xref: /linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision ef9226cd56b718c79184a3466d32984a51cb449c)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69 
70 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
71 
72 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
76 
77 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
86 
87 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
88 
89 #define GAUDI_MAX_STRING_LEN		20
90 
91 #define GAUDI_CB_POOL_CB_CNT		512
92 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
93 
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
95 
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
97 
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
99 
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
101 
102 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
103 
104 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105 
106 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
107 
108 #define MONITOR_SOB_STRING_SIZE		256
109 
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 	GAUDI_QUEUE_ID_DMA_0_0,
112 	GAUDI_QUEUE_ID_DMA_0_1,
113 	GAUDI_QUEUE_ID_DMA_0_2,
114 	GAUDI_QUEUE_ID_DMA_0_3,
115 	GAUDI_QUEUE_ID_DMA_1_0,
116 	GAUDI_QUEUE_ID_DMA_1_1,
117 	GAUDI_QUEUE_ID_DMA_1_2,
118 	GAUDI_QUEUE_ID_DMA_1_3
119 };
120 
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131 
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
134 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
135 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
136 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
137 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
138 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
139 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
140 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142 
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
145 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
146 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
147 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
148 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
149 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
150 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
151 	[PACKET_FENCE]		= sizeof(struct packet_fence),
152 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
153 	[PACKET_NOP]		= sizeof(struct packet_nop),
154 	[PACKET_STOP]		= sizeof(struct packet_stop),
155 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
156 	[PACKET_WAIT]		= sizeof(struct packet_wait),
157 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
158 };
159 
160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 	switch (id) {
163 	case PACKET_WREG_32:
164 	case PACKET_WREG_BULK:
165 	case PACKET_MSG_LONG:
166 	case PACKET_MSG_SHORT:
167 	case PACKET_CP_DMA:
168 	case PACKET_REPEAT:
169 	case PACKET_MSG_PROT:
170 	case PACKET_FENCE:
171 	case PACKET_LIN_DMA:
172 	case PACKET_NOP:
173 	case PACKET_STOP:
174 	case PACKET_ARB_POINT:
175 	case PACKET_WAIT:
176 	case PACKET_LOAD_AND_EXE:
177 		return true;
178 	default:
179 		return false;
180 	}
181 }
182 
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 	"tpc_address_exceed_slm",
186 	"tpc_div_by_0",
187 	"tpc_spu_mac_overflow",
188 	"tpc_spu_addsub_overflow",
189 	"tpc_spu_abs_overflow",
190 	"tpc_spu_fp_dst_nan_inf",
191 	"tpc_spu_fp_dst_denorm",
192 	"tpc_vpu_mac_overflow",
193 	"tpc_vpu_addsub_overflow",
194 	"tpc_vpu_abs_overflow",
195 	"tpc_vpu_fp_dst_nan_inf",
196 	"tpc_vpu_fp_dst_denorm",
197 	"tpc_assertions",
198 	"tpc_illegal_instruction",
199 	"tpc_pc_wrap_around",
200 	"tpc_qm_sw_err",
201 	"tpc_hbw_rresp_err",
202 	"tpc_hbw_bresp_err",
203 	"tpc_lbw_rresp_err",
204 	"tpc_lbw_bresp_err"
205 };
206 
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 	"PQ AXI HBW error",
210 	"CQ AXI HBW error",
211 	"CP AXI HBW error",
212 	"CP error due to undefined OPCODE",
213 	"CP encountered STOP OPCODE",
214 	"CP AXI LBW error",
215 	"CP WRREG32 or WRBULK returned error",
216 	"N/A",
217 	"FENCE 0 inc over max value and clipped",
218 	"FENCE 1 inc over max value and clipped",
219 	"FENCE 2 inc over max value and clipped",
220 	"FENCE 3 inc over max value and clipped",
221 	"FENCE 0 dec under min value and clipped",
222 	"FENCE 1 dec under min value and clipped",
223 	"FENCE 2 dec under min value and clipped",
224 	"FENCE 3 dec under min value and clipped"
225 };
226 
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 	"Choice push while full error",
230 	"Choice Q watchdog error",
231 	"MSG AXI LBW returned with error"
232 };
233 
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349 
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379 
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393 
394 static s64 gaudi_state_dump_specs_props[] = {
395 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 	[SP_MON_OBJ_WR_ADDR_LOW] =
399 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 	[SP_MON_OBJ_WR_ADDR_HIGH] =
401 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 	[SP_FENCE0_CNT_OFFSET] =
423 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 	[SP_FENCE0_RDATA_OFFSET] =
425 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_NUM_CORES] = 1,
428 };
429 
430 static const int gaudi_queue_id_to_engine_id[] = {
431 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461 
462 /* The order here is opposite to the order of the indexing in the h/w.
463  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464  */
465 static const char * const gaudi_sync_manager_names[] = {
466 	"SYNC_MGR_E_N",
467 	"SYNC_MGR_W_N",
468 	"SYNC_MGR_E_S",
469 	"SYNC_MGR_W_S",
470 	NULL
471 };
472 
473 struct ecc_info_extract_params {
474 	u64 block_address;
475 	u32 num_memories;
476 	bool derr;
477 };
478 
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 								u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 					struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 					u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 					u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 				u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 				u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 				struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 		return HL_COLLECTIVE_MASTER;
502 
503 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 		return HL_COLLECTIVE_SLAVE;
506 
507 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 		return HL_COLLECTIVE_SLAVE;
510 
511 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 		return HL_COLLECTIVE_SLAVE;
514 
515 	return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517 
518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 	struct asic_fixed_properties *prop = &hdev->asic_prop;
521 
522 	if (hdev->card_type == cpucp_card_type_pmc) {
523 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524 
525 		if (prop->fw_security_enabled)
526 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 		else
528 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 	} else {
530 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 	}
533 }
534 
535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 	struct asic_fixed_properties *prop = &hdev->asic_prop;
538 	u32 num_sync_stream_queues = 0;
539 	int i;
540 
541 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 	prop->hw_queues_props = kcalloc(prop->max_queues,
543 			sizeof(struct hw_queue_properties),
544 			GFP_KERNEL);
545 
546 	if (!prop->hw_queues_props)
547 		return -ENOMEM;
548 
549 	for (i = 0 ; i < prop->max_queues ; i++) {
550 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 			prop->hw_queues_props[i].driver_only = 0;
553 			prop->hw_queues_props[i].supports_sync_stream = 1;
554 			prop->hw_queues_props[i].cb_alloc_flags =
555 				CB_ALLOC_KERNEL;
556 			num_sync_stream_queues++;
557 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 			prop->hw_queues_props[i].driver_only = 1;
560 			prop->hw_queues_props[i].supports_sync_stream = 0;
561 			prop->hw_queues_props[i].cb_alloc_flags =
562 				CB_ALLOC_KERNEL;
563 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 			prop->hw_queues_props[i].driver_only = 0;
566 			prop->hw_queues_props[i].supports_sync_stream = 0;
567 			prop->hw_queues_props[i].cb_alloc_flags =
568 				CB_ALLOC_USER;
569 
570 		}
571 		prop->hw_queues_props[i].collective_mode =
572 						get_collective_mode(hdev, i);
573 	}
574 
575 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 	prop->cfg_base_address = CFG_BASE;
577 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 	prop->host_base_address = HOST_PHYS_BASE;
579 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 	prop->collective_first_sob = 0;
583 	prop->collective_first_mon = 0;
584 
585 	/* 2 SOBs per internal queue stream are reserved for collective */
586 	prop->sync_stream_first_sob =
587 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 			* QMAN_STREAMS * HL_RSVD_SOBS;
589 
590 	/* 1 monitor per internal queue stream are reserved for collective
591 	 * 2 monitors per external queue stream are reserved for collective
592 	 */
593 	prop->sync_stream_first_mon =
594 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 			(NUMBER_OF_EXT_HW_QUEUES * 2);
596 
597 	prop->dram_base_address = DRAM_PHYS_BASE;
598 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601 
602 	prop->sram_base_address = SRAM_BASE_ADDR;
603 	prop->sram_size = SRAM_SIZE;
604 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 	prop->sram_user_base_address =
606 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607 
608 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610 
611 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 	if (hdev->pldm)
613 		prop->mmu_pgt_size = 0x800000; /* 8MB */
614 	else
615 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 	prop->mmu_pte_size = HL_PTE_SIZE;
617 	prop->dram_page_size = PAGE_SIZE_2MB;
618 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 	prop->dram_supports_virtual_memory = false;
620 
621 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 	prop->pmmu.end_addr =
633 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 	prop->pmmu.page_size = PAGE_SIZE_4KB;
635 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 	prop->pmmu.last_mask = LAST_MASK;
637 	/* TODO: will be duplicated until implementing per-MMU props */
638 	prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639 	prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640 
641 	/* PMMU and HPMMU are the same except of page size */
642 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644 
645 	/* shifts and masks are the same in PMMU and DMMU */
646 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 	prop->dmmu.page_size = PAGE_SIZE_2MB;
650 	prop->dmmu.pgt_size = prop->mmu_pgt_size;
651 
652 	prop->cfg_size = CFG_SIZE;
653 	prop->max_asid = MAX_ASID;
654 	prop->num_of_events = GAUDI_EVENT_SIZE;
655 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657 
658 	set_default_power_values(hdev);
659 
660 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662 
663 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665 
666 	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667 					CARD_NAME_MAX_LEN);
668 
669 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670 
671 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672 			prop->sync_stream_first_sob +
673 			(num_sync_stream_queues * HL_RSVD_SOBS);
674 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_mon +
676 			(num_sync_stream_queues * HL_RSVD_MONS);
677 
678 	prop->first_available_user_interrupt = USHRT_MAX;
679 	prop->tpc_interrupt_id = USHRT_MAX;
680 
681 	/* single msi */
682 	prop->eq_interrupt_id = 0;
683 
684 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 		prop->first_available_cq[i] = USHRT_MAX;
686 
687 	prop->fw_cpu_boot_dev_sts0_valid = false;
688 	prop->fw_cpu_boot_dev_sts1_valid = false;
689 	prop->hard_reset_done_by_fw = false;
690 	prop->gic_interrupts_enable = true;
691 
692 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693 
694 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696 
697 	prop->use_get_power_for_reset_history = true;
698 
699 	prop->configurable_stop_on_err = true;
700 
701 	prop->set_max_power_on_device_init = true;
702 
703 	prop->dma_mask = 48;
704 
705 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706 
707 	return 0;
708 }
709 
710 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 {
712 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 	bool is_wc[3] = {false, false, true};
714 	int rc;
715 
716 	rc = hl_pci_bars_map(hdev, name, is_wc);
717 	if (rc)
718 		return rc;
719 
720 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
722 
723 	return 0;
724 }
725 
726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 {
728 	struct gaudi_device *gaudi = hdev->asic_specific;
729 	struct hl_inbound_pci_region pci_region;
730 	u64 old_addr = addr;
731 	int rc;
732 
733 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 		return old_addr;
735 
736 	if (hdev->asic_prop.iatu_done_by_fw)
737 		return U64_MAX;
738 
739 	/* Inbound Region 2 - Bar 4 - Point to HBM */
740 	pci_region.mode = PCI_BAR_MATCH_MODE;
741 	pci_region.bar = HBM_BAR_ID;
742 	pci_region.addr = addr;
743 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744 	if (rc)
745 		return U64_MAX;
746 
747 	if (gaudi) {
748 		old_addr = gaudi->hbm_bar_cur_addr;
749 		gaudi->hbm_bar_cur_addr = addr;
750 	}
751 
752 	return old_addr;
753 }
754 
755 static int gaudi_init_iatu(struct hl_device *hdev)
756 {
757 	struct hl_inbound_pci_region inbound_region;
758 	struct hl_outbound_pci_region outbound_region;
759 	int rc;
760 
761 	if (hdev->asic_prop.iatu_done_by_fw)
762 		return 0;
763 
764 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 	inbound_region.mode = PCI_BAR_MATCH_MODE;
766 	inbound_region.bar = SRAM_BAR_ID;
767 	inbound_region.addr = SRAM_BASE_ADDR;
768 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 	if (rc)
770 		goto done;
771 
772 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 	inbound_region.mode = PCI_BAR_MATCH_MODE;
774 	inbound_region.bar = CFG_BAR_ID;
775 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 	if (rc)
778 		goto done;
779 
780 	/* Inbound Region 2 - Bar 4 - Point to HBM */
781 	inbound_region.mode = PCI_BAR_MATCH_MODE;
782 	inbound_region.bar = HBM_BAR_ID;
783 	inbound_region.addr = DRAM_PHYS_BASE;
784 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 	if (rc)
786 		goto done;
787 
788 	/* Outbound Region 0 - Point to Host */
789 	outbound_region.addr = HOST_PHYS_BASE;
790 	outbound_region.size = HOST_PHYS_SIZE;
791 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792 
793 done:
794 	return rc;
795 }
796 
797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 {
799 	return RREG32(mmHW_STATE);
800 }
801 
802 static int gaudi_early_init(struct hl_device *hdev)
803 {
804 	struct asic_fixed_properties *prop = &hdev->asic_prop;
805 	struct pci_dev *pdev = hdev->pdev;
806 	resource_size_t pci_bar_size;
807 	u32 fw_boot_status;
808 	int rc;
809 
810 	rc = gaudi_set_fixed_properties(hdev);
811 	if (rc) {
812 		dev_err(hdev->dev, "Failed setting fixed properties\n");
813 		return rc;
814 	}
815 
816 	/* Check BAR sizes */
817 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818 
819 	if (pci_bar_size != SRAM_BAR_SIZE) {
820 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 		rc = -ENODEV;
823 		goto free_queue_props;
824 	}
825 
826 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827 
828 	if (pci_bar_size != CFG_BAR_SIZE) {
829 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 		rc = -ENODEV;
832 		goto free_queue_props;
833 	}
834 
835 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837 
838 	/* If FW security is enabled at this point it means no access to ELBI */
839 	if (hdev->asic_prop.fw_security_enabled) {
840 		hdev->asic_prop.iatu_done_by_fw = true;
841 
842 		/*
843 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 		 * decision can only be taken based on PCI ID security.
845 		 */
846 		hdev->asic_prop.gic_interrupts_enable = false;
847 		goto pci_init;
848 	}
849 
850 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 				&fw_boot_status);
852 	if (rc)
853 		goto free_queue_props;
854 
855 	/* Check whether FW is configuring iATU */
856 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 		hdev->asic_prop.iatu_done_by_fw = true;
859 
860 pci_init:
861 	rc = hl_pci_init(hdev);
862 	if (rc)
863 		goto free_queue_props;
864 
865 	/* Before continuing in the initialization, we need to read the preboot
866 	 * version to determine whether we run with a security-enabled firmware
867 	 */
868 	rc = hl_fw_read_preboot_status(hdev);
869 	if (rc) {
870 		if (hdev->reset_on_preboot_fail)
871 			/* we are already on failure flow, so don't check if hw_fini fails. */
872 			hdev->asic_funcs->hw_fini(hdev, true, false);
873 		goto pci_fini;
874 	}
875 
876 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 		if (rc) {
880 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 			goto pci_fini;
882 		}
883 	}
884 
885 	return 0;
886 
887 pci_fini:
888 	hl_pci_fini(hdev);
889 free_queue_props:
890 	kfree(hdev->asic_prop.hw_queues_props);
891 	return rc;
892 }
893 
894 static int gaudi_early_fini(struct hl_device *hdev)
895 {
896 	kfree(hdev->asic_prop.hw_queues_props);
897 	hl_pci_fini(hdev);
898 
899 	return 0;
900 }
901 
902 /**
903  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904  *
905  * @hdev: pointer to hl_device structure
906  *
907  */
908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 {
910 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 	struct asic_fixed_properties *prop = &hdev->asic_prop;
912 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 	int rc;
914 
915 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 		struct gaudi_device *gaudi = hdev->asic_specific;
918 
919 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 			return 0;
921 
922 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923 
924 		if (rc)
925 			return rc;
926 
927 		freq = pll_freq_arr[2];
928 	} else {
929 		/* Backward compatibility */
930 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 		nr = RREG32(mmPSOC_CPU_PLL_NR);
933 		nf = RREG32(mmPSOC_CPU_PLL_NF);
934 		od = RREG32(mmPSOC_CPU_PLL_OD);
935 
936 		if (div_sel == DIV_SEL_REF_CLK ||
937 				div_sel == DIV_SEL_DIVIDED_REF) {
938 			if (div_sel == DIV_SEL_REF_CLK)
939 				freq = PLL_REF_CLK;
940 			else
941 				freq = PLL_REF_CLK / (div_fctr + 1);
942 		} else if (div_sel == DIV_SEL_PLL_CLK ||
943 			div_sel == DIV_SEL_DIVIDED_PLL) {
944 			pll_clk = PLL_REF_CLK * (nf + 1) /
945 					((nr + 1) * (od + 1));
946 			if (div_sel == DIV_SEL_PLL_CLK)
947 				freq = pll_clk;
948 			else
949 				freq = pll_clk / (div_fctr + 1);
950 		} else {
951 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 			freq = 0;
953 		}
954 	}
955 
956 	prop->psoc_timestamp_frequency = freq;
957 	prop->psoc_pci_pll_nr = nr;
958 	prop->psoc_pci_pll_nf = nf;
959 	prop->psoc_pci_pll_od = od;
960 	prop->psoc_pci_pll_div_factor = div_fctr;
961 
962 	return 0;
963 }
964 
965 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 {
968 	struct asic_fixed_properties *prop = &hdev->asic_prop;
969 	struct packet_lin_dma *init_tpc_mem_pkt;
970 	struct hl_cs_job *job;
971 	struct hl_cb *cb;
972 	u64 dst_addr;
973 	u32 cb_size, ctl;
974 	u8 tpc_id;
975 	int rc;
976 
977 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978 	if (!cb)
979 		return -EFAULT;
980 
981 	init_tpc_mem_pkt = cb->kernel_address;
982 	cb_size = sizeof(*init_tpc_mem_pkt);
983 	memset(init_tpc_mem_pkt, 0, cb_size);
984 
985 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986 
987 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991 
992 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993 
994 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995 
996 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 				round_up(prop->sram_user_base_address, SZ_8K));
999 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000 
1001 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 	if (!job) {
1003 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 		rc = -ENOMEM;
1005 		goto release_cb;
1006 	}
1007 
1008 	job->id = 0;
1009 	job->user_cb = cb;
1010 	atomic_inc(&job->user_cb->cs_cnt);
1011 	job->user_cb_size = cb_size;
1012 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 	job->patched_cb = job->user_cb;
1014 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015 
1016 	hl_debugfs_add_job(hdev, job);
1017 
1018 	rc = gaudi_send_job_on_qman0(hdev, job);
1019 
1020 	if (rc)
1021 		goto free_job;
1022 
1023 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025 		if (rc)
1026 			break;
1027 	}
1028 
1029 free_job:
1030 	hl_userptr_delete_list(hdev, &job->userptr_list);
1031 	hl_debugfs_remove_job(hdev, job);
1032 	kfree(job);
1033 	atomic_dec(&cb->cs_cnt);
1034 
1035 release_cb:
1036 	hl_cb_put(cb);
1037 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038 
1039 	return rc;
1040 }
1041 
1042 /*
1043  * gaudi_init_tpc_mem() - Initialize TPC memories.
1044  * @hdev: Pointer to hl_device structure.
1045  *
1046  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047  *
1048  * Return: 0 for success, negative value for error.
1049  */
1050 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 {
1052 	const struct firmware *fw;
1053 	size_t fw_size;
1054 	void *cpu_addr;
1055 	dma_addr_t dma_handle;
1056 	int rc, count = 5;
1057 
1058 again:
1059 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060 	if (rc == -EINTR && count-- > 0) {
1061 		msleep(50);
1062 		goto again;
1063 	}
1064 
1065 	if (rc) {
1066 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 				GAUDI_TPC_FW_FILE);
1068 		goto out;
1069 	}
1070 
1071 	fw_size = fw->size;
1072 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 	if (!cpu_addr) {
1074 		dev_err(hdev->dev,
1075 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1076 			fw_size);
1077 		rc = -ENOMEM;
1078 		goto out;
1079 	}
1080 
1081 	memcpy(cpu_addr, fw->data, fw_size);
1082 
1083 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084 
1085 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086 
1087 out:
1088 	release_firmware(fw);
1089 	return rc;
1090 }
1091 
1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 {
1094 	struct gaudi_device *gaudi = hdev->asic_specific;
1095 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 	struct hl_hw_queue *q;
1097 	u32 i, sob_id, sob_group_id, queue_id;
1098 
1099 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1100 	sob_group_id =
1101 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103 
1104 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 	}
1109 
1110 	/* Both DMA5 and TPC7 use the same resources since only a single
1111 	 * engine need to participate in the reduction process
1112 	 */
1113 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 	q = &hdev->kernel_queues[queue_id];
1115 	q->sync_stream_prop.collective_sob_id =
1116 			sob_id + NIC_NUMBER_OF_ENGINES;
1117 
1118 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 	q = &hdev->kernel_queues[queue_id];
1120 	q->sync_stream_prop.collective_sob_id =
1121 			sob_id + NIC_NUMBER_OF_ENGINES;
1122 }
1123 
1124 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 {
1126 	struct gaudi_hw_sob_group *hw_sob_group =
1127 		container_of(ref, struct gaudi_hw_sob_group, kref);
1128 	struct hl_device *hdev = hw_sob_group->hdev;
1129 	int i;
1130 
1131 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134 
1135 	kref_init(&hw_sob_group->kref);
1136 }
1137 
1138 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 {
1140 	struct gaudi_hw_sob_group *hw_sob_group =
1141 		container_of(ref, struct gaudi_hw_sob_group, kref);
1142 	struct hl_device *hdev = hw_sob_group->hdev;
1143 
1144 	dev_crit(hdev->dev,
1145 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1146 		hw_sob_group->base_sob_id);
1147 }
1148 
1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 {
1151 	struct gaudi_collective_properties *prop;
1152 	int i;
1153 
1154 	prop = &gaudi->collective_props;
1155 
1156 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157 
1158 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 	/* Set collective engine bit */
1163 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165 }
1166 
1167 static int gaudi_collective_init(struct hl_device *hdev)
1168 {
1169 	u32 i, sob_id, reserved_sobs_per_group;
1170 	struct gaudi_collective_properties *prop;
1171 	struct gaudi_device *gaudi;
1172 
1173 	gaudi = hdev->asic_specific;
1174 	prop = &gaudi->collective_props;
1175 	sob_id = hdev->asic_prop.collective_first_sob;
1176 
1177 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 	reserved_sobs_per_group =
1179 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180 
1181 	/* Init SOB groups */
1182 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 		prop->hw_sob_group[i].hdev = hdev;
1184 		prop->hw_sob_group[i].base_sob_id = sob_id;
1185 		sob_id += reserved_sobs_per_group;
1186 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187 	}
1188 
1189 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 		prop->next_sob_group_val[i] = 1;
1191 		prop->curr_sob_group_idx[i] = 0;
1192 		gaudi_collective_map_sobs(hdev, i);
1193 	}
1194 
1195 	gaudi_collective_mstr_sob_mask_set(gaudi);
1196 
1197 	return 0;
1198 }
1199 
1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 {
1202 	struct gaudi_device *gaudi = hdev->asic_specific;
1203 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204 
1205 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1206 					gaudi_sob_group_hw_reset);
1207 }
1208 
1209 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 {
1212 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 	struct gaudi_collective_properties *cprop;
1214 	struct hl_gen_wait_properties wait_prop;
1215 	struct hl_sync_stream_properties *prop;
1216 	struct gaudi_device *gaudi;
1217 
1218 	gaudi = hdev->asic_specific;
1219 	cprop = &gaudi->collective_props;
1220 	queue_id = job->hw_queue_id;
1221 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222 
1223 	master_sob_base =
1224 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 	master_monitor = prop->collective_mstr_mon_id[0];
1226 
1227 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228 
1229 	dev_dbg(hdev->dev,
1230 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 		master_sob_base, cprop->mstr_sob_mask[0],
1232 		cprop->next_sob_group_val[stream],
1233 		master_monitor, queue_id);
1234 
1235 	wait_prop.data = (void *) job->patched_cb;
1236 	wait_prop.sob_base = master_sob_base;
1237 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 	wait_prop.mon_id = master_monitor;
1240 	wait_prop.q_idx = queue_id;
1241 	wait_prop.size = cb_size;
1242 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243 
1244 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 	master_monitor = prop->collective_mstr_mon_id[1];
1246 
1247 	dev_dbg(hdev->dev,
1248 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 		master_sob_base, cprop->mstr_sob_mask[1],
1250 		cprop->next_sob_group_val[stream],
1251 		master_monitor, queue_id);
1252 
1253 	wait_prop.sob_base = master_sob_base;
1254 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 	wait_prop.mon_id = master_monitor;
1256 	wait_prop.size = cb_size;
1257 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258 }
1259 
1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 {
1263 	struct hl_gen_wait_properties wait_prop;
1264 	struct hl_sync_stream_properties *prop;
1265 	u32 queue_id, cb_size = 0;
1266 
1267 	queue_id = job->hw_queue_id;
1268 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269 
1270 	if (job->cs->encaps_signals) {
1271 		/* use the encaps signal handle store earlier in the flow
1272 		 * and set the SOB information from the encaps
1273 		 * signals handle
1274 		 */
1275 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276 						cs_cmpl);
1277 
1278 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1279 				job->cs->sequence,
1280 				cs_cmpl->hw_sob->sob_id,
1281 				cs_cmpl->sob_val);
1282 	}
1283 
1284 	/* Add to wait CBs using slave monitor */
1285 	wait_prop.data = (void *) job->user_cb;
1286 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 	wait_prop.sob_mask = 0x1;
1288 	wait_prop.sob_val = cs_cmpl->sob_val;
1289 	wait_prop.mon_id = prop->collective_slave_mon_id;
1290 	wait_prop.q_idx = queue_id;
1291 	wait_prop.size = cb_size;
1292 
1293 	dev_dbg(hdev->dev,
1294 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 		prop->collective_slave_mon_id, queue_id);
1297 
1298 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299 
1300 	dev_dbg(hdev->dev,
1301 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 		prop->collective_sob_id, queue_id);
1303 
1304 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305 			prop->collective_sob_id, cb_size, false);
1306 }
1307 
1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 {
1310 	struct hl_cs_compl *signal_cs_cmpl =
1311 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 	struct hl_cs_compl *cs_cmpl =
1313 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 	struct gaudi_collective_properties *cprop;
1316 	u32 stream, queue_id, sob_group_offset;
1317 	struct gaudi_device *gaudi;
1318 	struct hl_device *hdev;
1319 	struct hl_cs_job *job;
1320 	struct hl_ctx *ctx;
1321 
1322 	ctx = cs->ctx;
1323 	hdev = ctx->hdev;
1324 	gaudi = hdev->asic_specific;
1325 	cprop = &gaudi->collective_props;
1326 
1327 	if (cs->encaps_signals) {
1328 		cs_cmpl->hw_sob = handle->hw_sob;
1329 		/* at this checkpoint we only need the hw_sob pointer
1330 		 * for the completion check before start going over the jobs
1331 		 * of the master/slaves, the sob_value will be taken later on
1332 		 * in gaudi_collective_slave_init_job depends on each
1333 		 * job wait offset value.
1334 		 */
1335 		cs_cmpl->sob_val = 0;
1336 	} else {
1337 		/* copy the SOB id and value of the signal CS */
1338 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 	}
1341 
1342 	/* check again if the signal cs already completed.
1343 	 * if yes then don't send any wait cs since the hw_sob
1344 	 * could be in reset already. if signal is not completed
1345 	 * then get refcount to hw_sob to prevent resetting the sob
1346 	 * while wait cs is not submitted.
1347 	 * note that this check is protected by two locks,
1348 	 * hw queue lock and completion object lock,
1349 	 * and the same completion object lock also protects
1350 	 * the hw_sob reset handler function.
1351 	 * The hw_queue lock prevent out of sync of hw_sob
1352 	 * refcount value, changed by signal/wait flows.
1353 	 */
1354 	spin_lock(&signal_cs_cmpl->lock);
1355 
1356 	if (completion_done(&cs->signal_fence->completion)) {
1357 		spin_unlock(&signal_cs_cmpl->lock);
1358 		return -EINVAL;
1359 	}
1360 	/* Increment kref since all slave queues are now waiting on it */
1361 	kref_get(&cs_cmpl->hw_sob->kref);
1362 
1363 	spin_unlock(&signal_cs_cmpl->lock);
1364 
1365 	/* Calculate the stream from collective master queue (1st job) */
1366 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 	stream = job->hw_queue_id % 4;
1368 	sob_group_offset =
1369 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370 
1371 	list_for_each_entry(job, &cs->job_list, cs_node) {
1372 		queue_id = job->hw_queue_id;
1373 
1374 		if (hdev->kernel_queues[queue_id].collective_mode ==
1375 				HL_COLLECTIVE_MASTER)
1376 			gaudi_collective_master_init_job(hdev, job, stream,
1377 						sob_group_offset);
1378 		else
1379 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 	}
1381 
1382 	cs_cmpl->sob_group = sob_group_offset;
1383 
1384 	/* Handle sob group kref and wraparound */
1385 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386 	cprop->next_sob_group_val[stream]++;
1387 
1388 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 		/*
1390 		 * Decrement as we reached the max value.
1391 		 * The release function won't be called here as we've
1392 		 * just incremented the refcount.
1393 		 */
1394 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395 				gaudi_sob_group_reset_error);
1396 		cprop->next_sob_group_val[stream] = 1;
1397 		/* only two SOBs are currently in use */
1398 		cprop->curr_sob_group_idx[stream] =
1399 			(cprop->curr_sob_group_idx[stream] + 1) &
1400 							(HL_RSVD_SOBS - 1);
1401 
1402 		gaudi_collective_map_sobs(hdev, stream);
1403 
1404 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 				cprop->curr_sob_group_idx[stream], stream);
1406 	}
1407 
1408 	mb();
1409 	hl_fence_put(cs->signal_fence);
1410 	cs->signal_fence = NULL;
1411 
1412 	return 0;
1413 }
1414 
1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 {
1417 	u32 cacheline_end, additional_commands;
1418 
1419 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1421 
1422 	if (user_cb_size + additional_commands > cacheline_end)
1423 		return cacheline_end - user_cb_size + additional_commands;
1424 	else
1425 		return additional_commands;
1426 }
1427 
1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 		struct hl_ctx *ctx, struct hl_cs *cs,
1430 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 		u32 encaps_signal_offset)
1432 {
1433 	struct hw_queue_properties *hw_queue_prop;
1434 	struct hl_cs_counters_atomic *cntr;
1435 	struct hl_cs_job *job;
1436 	struct hl_cb *cb;
1437 	u32 cb_size;
1438 	bool patched_cb;
1439 
1440 	cntr = &hdev->aggregated_cs_counters;
1441 
1442 	if (mode == HL_COLLECTIVE_MASTER) {
1443 		/* CB size of collective master queue contains
1444 		 * 4 msg short packets for monitor 1 configuration
1445 		 * 1 fence packet
1446 		 * 4 msg short packets for monitor 2 configuration
1447 		 * 1 fence packet
1448 		 * 2 msg prot packets for completion and MSI
1449 		 */
1450 		cb_size = sizeof(struct packet_msg_short) * 8 +
1451 				sizeof(struct packet_fence) * 2 +
1452 				sizeof(struct packet_msg_prot) * 2;
1453 		patched_cb = true;
1454 	} else {
1455 		/* CB size of collective slave queues contains
1456 		 * 4 msg short packets for monitor configuration
1457 		 * 1 fence packet
1458 		 * 1 additional msg short packet for sob signal
1459 		 */
1460 		cb_size = sizeof(struct packet_msg_short) * 5 +
1461 				sizeof(struct packet_fence);
1462 		patched_cb = false;
1463 	}
1464 
1465 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 	if (!job) {
1468 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 		return -ENOMEM;
1472 	}
1473 
1474 	/* Allocate internal mapped CB for non patched CBs */
1475 	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476 	if (!cb) {
1477 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479 		kfree(job);
1480 		return -EFAULT;
1481 	}
1482 
1483 	job->id = 0;
1484 	job->cs = cs;
1485 	job->user_cb = cb;
1486 	atomic_inc(&job->user_cb->cs_cnt);
1487 	job->user_cb_size = cb_size;
1488 	job->hw_queue_id = queue_id;
1489 
1490 	/* since its guaranteed to have only one chunk in the collective wait
1491 	 * cs, we can use this chunk to set the encapsulated signal offset
1492 	 * in the jobs.
1493 	 */
1494 	if (cs->encaps_signals)
1495 		job->encaps_sig_wait_offset = encaps_signal_offset;
1496 
1497 	/*
1498 	 * No need in parsing, user CB is the patched CB.
1499 	 * We call hl_cb_destroy() out of two reasons - we don't need
1500 	 * the CB in the CB idr anymore and to decrement its refcount as
1501 	 * it was incremented inside hl_cb_kernel_create().
1502 	 */
1503 	if (patched_cb)
1504 		job->patched_cb = job->user_cb;
1505 	else
1506 		job->patched_cb = NULL;
1507 
1508 	job->job_cb_size = job->user_cb_size;
1509 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510 
1511 	/* increment refcount as for external queues we get completion */
1512 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513 		cs_get(cs);
1514 
1515 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516 
1517 	list_add_tail(&job->cs_node, &cs->job_list);
1518 
1519 	hl_debugfs_add_job(hdev, job);
1520 
1521 	return 0;
1522 }
1523 
1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525 		struct hl_ctx *ctx, struct hl_cs *cs,
1526 		u32 wait_queue_id, u32 collective_engine_id,
1527 		u32 encaps_signal_offset)
1528 {
1529 	struct gaudi_device *gaudi = hdev->asic_specific;
1530 	struct hw_queue_properties *hw_queue_prop;
1531 	u32 queue_id, collective_queue, num_jobs;
1532 	u32 stream, nic_queue, nic_idx = 0;
1533 	bool skip;
1534 	int i, rc = 0;
1535 
1536 	/* Verify wait queue id is configured as master */
1537 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 		dev_err(hdev->dev,
1540 			"Queue %d is not configured as collective master\n",
1541 			wait_queue_id);
1542 		return -EINVAL;
1543 	}
1544 
1545 	/* Verify engine id is supported */
1546 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 		dev_err(hdev->dev,
1549 			"Collective wait does not support engine %u\n",
1550 			collective_engine_id);
1551 		return -EINVAL;
1552 	}
1553 
1554 	stream = wait_queue_id % 4;
1555 
1556 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 	else
1559 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560 
1561 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563 
1564 	/* First job goes to the collective master queue, it will wait for
1565 	 * the collective slave queues to finish execution.
1566 	 * The synchronization is done using two monitors:
1567 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 	 * reduction engine (DMA5/TPC7).
1569 	 *
1570 	 * Rest of the jobs goes to the collective slave queues which will
1571 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 	 */
1573 	for (i = 0 ; i < num_jobs ; i++) {
1574 		if (i == 0) {
1575 			queue_id = wait_queue_id;
1576 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577 				HL_COLLECTIVE_MASTER, queue_id,
1578 				wait_queue_id, encaps_signal_offset);
1579 		} else {
1580 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581 				if (gaudi->hw_cap_initialized &
1582 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583 					skip = false;
1584 				else
1585 					skip = true;
1586 
1587 				queue_id = nic_queue;
1588 				nic_queue += 4;
1589 				nic_idx++;
1590 
1591 				if (skip)
1592 					continue;
1593 			} else {
1594 				queue_id = collective_queue;
1595 			}
1596 
1597 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598 				HL_COLLECTIVE_SLAVE, queue_id,
1599 				wait_queue_id, encaps_signal_offset);
1600 		}
1601 
1602 		if (rc)
1603 			return rc;
1604 	}
1605 
1606 	return rc;
1607 }
1608 
1609 static int gaudi_late_init(struct hl_device *hdev)
1610 {
1611 	struct gaudi_device *gaudi = hdev->asic_specific;
1612 	int rc;
1613 
1614 	rc = gaudi->cpucp_info_get(hdev);
1615 	if (rc) {
1616 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1617 		return rc;
1618 	}
1619 
1620 	if ((hdev->card_type == cpucp_card_type_pci) &&
1621 			(hdev->nic_ports_mask & 0x3)) {
1622 		dev_info(hdev->dev,
1623 			"PCI card detected, only 8 ports are enabled\n");
1624 		hdev->nic_ports_mask &= ~0x3;
1625 
1626 		/* Stop and disable unused NIC QMANs */
1627 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630 
1631 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634 
1635 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637 
1638 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639 	}
1640 
1641 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642 	if (rc) {
1643 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1644 		return rc;
1645 	}
1646 
1647 	/* Scrub both SRAM and DRAM */
1648 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1649 	if (rc)
1650 		goto disable_pci_access;
1651 
1652 	rc = gaudi_fetch_psoc_frequency(hdev);
1653 	if (rc) {
1654 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1655 		goto disable_pci_access;
1656 	}
1657 
1658 	rc = gaudi_mmu_clear_pgt_range(hdev);
1659 	if (rc) {
1660 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1661 		goto disable_pci_access;
1662 	}
1663 
1664 	rc = gaudi_init_tpc_mem(hdev);
1665 	if (rc) {
1666 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1667 		goto disable_pci_access;
1668 	}
1669 
1670 	rc = gaudi_collective_init(hdev);
1671 	if (rc) {
1672 		dev_err(hdev->dev, "Failed to init collective\n");
1673 		goto disable_pci_access;
1674 	}
1675 
1676 	/* We only support a single ASID for the user, so for the sake of optimization, just
1677 	 * initialize the ASID one time during device initialization with the fixed value of 1
1678 	 */
1679 	gaudi_mmu_prepare(hdev, 1);
1680 
1681 	hl_fw_set_pll_profile(hdev);
1682 
1683 	return 0;
1684 
1685 disable_pci_access:
1686 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1687 
1688 	return rc;
1689 }
1690 
1691 static void gaudi_late_fini(struct hl_device *hdev)
1692 {
1693 	hl_hwmon_release_resources(hdev);
1694 }
1695 
1696 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1697 {
1698 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1699 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1700 	int i, j, rc = 0;
1701 
1702 	/*
1703 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1704 	 * to '1' when accessing the host.
1705 	 * Bits 49:39 of the full host address are saved for a later
1706 	 * configuration of the HW to perform extension to 50 bits.
1707 	 * Because there is a single HW register that holds the extension bits,
1708 	 * these bits must be identical in all allocated range.
1709 	 */
1710 
1711 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1712 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1713 								&dma_addr_arr[i],
1714 								GFP_KERNEL | __GFP_ZERO);
1715 		if (!virt_addr_arr[i]) {
1716 			rc = -ENOMEM;
1717 			goto free_dma_mem_arr;
1718 		}
1719 
1720 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1721 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1722 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1723 			break;
1724 	}
1725 
1726 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1727 		dev_err(hdev->dev,
1728 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1729 		rc = -EFAULT;
1730 		goto free_dma_mem_arr;
1731 	}
1732 
1733 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1734 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1735 	hdev->cpu_pci_msb_addr =
1736 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1737 
1738 	if (!hdev->asic_prop.fw_security_enabled)
1739 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1740 
1741 free_dma_mem_arr:
1742 	for (j = 0 ; j < i ; j++)
1743 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1744 						dma_addr_arr[j]);
1745 
1746 	return rc;
1747 }
1748 
1749 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1750 {
1751 	struct gaudi_device *gaudi = hdev->asic_specific;
1752 	struct gaudi_internal_qman_info *q;
1753 	u32 i;
1754 
1755 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1756 		q = &gaudi->internal_qmans[i];
1757 		if (!q->pq_kernel_addr)
1758 			continue;
1759 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1760 	}
1761 }
1762 
1763 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1764 {
1765 	struct gaudi_device *gaudi = hdev->asic_specific;
1766 	struct gaudi_internal_qman_info *q;
1767 	int rc, i;
1768 
1769 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1770 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1771 			continue;
1772 
1773 		q = &gaudi->internal_qmans[i];
1774 
1775 		switch (i) {
1776 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1777 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1778 			break;
1779 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1780 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1781 			break;
1782 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1783 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1784 			break;
1785 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1786 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1787 			break;
1788 		default:
1789 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1790 			rc = -EINVAL;
1791 			goto free_internal_qmans_pq_mem;
1792 		}
1793 
1794 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1795 								GFP_KERNEL | __GFP_ZERO);
1796 		if (!q->pq_kernel_addr) {
1797 			rc = -ENOMEM;
1798 			goto free_internal_qmans_pq_mem;
1799 		}
1800 	}
1801 
1802 	return 0;
1803 
1804 free_internal_qmans_pq_mem:
1805 	gaudi_free_internal_qmans_pq_mem(hdev);
1806 	return rc;
1807 }
1808 
1809 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1810 {
1811 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1812 	struct pci_mem_region *region;
1813 
1814 	/* CFG */
1815 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1816 	region->region_base = CFG_BASE;
1817 	region->region_size = CFG_SIZE;
1818 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1819 	region->bar_size = CFG_BAR_SIZE;
1820 	region->bar_id = CFG_BAR_ID;
1821 	region->used = 1;
1822 
1823 	/* SRAM */
1824 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1825 	region->region_base = SRAM_BASE_ADDR;
1826 	region->region_size = SRAM_SIZE;
1827 	region->offset_in_bar = 0;
1828 	region->bar_size = SRAM_BAR_SIZE;
1829 	region->bar_id = SRAM_BAR_ID;
1830 	region->used = 1;
1831 
1832 	/* DRAM */
1833 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1834 	region->region_base = DRAM_PHYS_BASE;
1835 	region->region_size = hdev->asic_prop.dram_size;
1836 	region->offset_in_bar = 0;
1837 	region->bar_size = prop->dram_pci_bar_size;
1838 	region->bar_id = HBM_BAR_ID;
1839 	region->used = 1;
1840 
1841 	/* SP SRAM */
1842 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1843 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1844 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1845 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1846 	region->bar_size = CFG_BAR_SIZE;
1847 	region->bar_id = CFG_BAR_ID;
1848 	region->used = 1;
1849 }
1850 
1851 static int gaudi_sw_init(struct hl_device *hdev)
1852 {
1853 	struct gaudi_device *gaudi;
1854 	u32 i, event_id = 0;
1855 	int rc;
1856 
1857 	/* Allocate device structure */
1858 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1859 	if (!gaudi)
1860 		return -ENOMEM;
1861 
1862 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1863 		if (gaudi_irq_map_table[i].valid) {
1864 			if (event_id == GAUDI_EVENT_SIZE) {
1865 				dev_err(hdev->dev,
1866 					"Event array exceeds the limit of %u events\n",
1867 					GAUDI_EVENT_SIZE);
1868 				rc = -EINVAL;
1869 				goto free_gaudi_device;
1870 			}
1871 
1872 			gaudi->events[event_id++] =
1873 					gaudi_irq_map_table[i].fc_id;
1874 		}
1875 	}
1876 
1877 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1878 
1879 	hdev->asic_specific = gaudi;
1880 
1881 	/* Create DMA pool for small allocations */
1882 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1883 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1884 	if (!hdev->dma_pool) {
1885 		dev_err(hdev->dev, "failed to create DMA pool\n");
1886 		rc = -ENOMEM;
1887 		goto free_gaudi_device;
1888 	}
1889 
1890 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1891 	if (rc)
1892 		goto free_dma_pool;
1893 
1894 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1895 	if (!hdev->cpu_accessible_dma_pool) {
1896 		dev_err(hdev->dev,
1897 			"Failed to create CPU accessible DMA pool\n");
1898 		rc = -ENOMEM;
1899 		goto free_cpu_dma_mem;
1900 	}
1901 
1902 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1903 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1904 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1905 	if (rc) {
1906 		dev_err(hdev->dev,
1907 			"Failed to add memory to CPU accessible DMA pool\n");
1908 		rc = -EFAULT;
1909 		goto free_cpu_accessible_dma_pool;
1910 	}
1911 
1912 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1913 	if (rc)
1914 		goto free_cpu_accessible_dma_pool;
1915 
1916 	spin_lock_init(&gaudi->hw_queues_lock);
1917 
1918 	hdev->supports_sync_stream = true;
1919 	hdev->supports_coresight = true;
1920 	hdev->supports_staged_submission = true;
1921 	hdev->supports_wait_for_multi_cs = true;
1922 
1923 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1924 	hdev->stream_master_qid_arr =
1925 				hdev->asic_funcs->get_stream_master_qid_arr();
1926 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1927 
1928 	return 0;
1929 
1930 free_cpu_accessible_dma_pool:
1931 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1932 free_cpu_dma_mem:
1933 	if (!hdev->asic_prop.fw_security_enabled)
1934 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1935 					hdev->cpu_pci_msb_addr);
1936 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1937 					hdev->cpu_accessible_dma_address);
1938 free_dma_pool:
1939 	dma_pool_destroy(hdev->dma_pool);
1940 free_gaudi_device:
1941 	kfree(gaudi);
1942 	return rc;
1943 }
1944 
1945 static int gaudi_sw_fini(struct hl_device *hdev)
1946 {
1947 	struct gaudi_device *gaudi = hdev->asic_specific;
1948 
1949 	gaudi_free_internal_qmans_pq_mem(hdev);
1950 
1951 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1952 
1953 	if (!hdev->asic_prop.fw_security_enabled)
1954 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1955 					hdev->cpu_pci_msb_addr);
1956 
1957 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1958 					hdev->cpu_accessible_dma_address);
1959 
1960 	dma_pool_destroy(hdev->dma_pool);
1961 
1962 	kfree(gaudi);
1963 
1964 	return 0;
1965 }
1966 
1967 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1968 {
1969 	struct hl_device *hdev = arg;
1970 	int i;
1971 
1972 	if (hdev->disabled)
1973 		return IRQ_HANDLED;
1974 
1975 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1976 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1977 
1978 	hl_irq_handler_eq(irq, &hdev->event_queue);
1979 
1980 	return IRQ_HANDLED;
1981 }
1982 
1983 /*
1984  * For backward compatibility, new MSI interrupts should be set after the
1985  * existing CPU and NIC interrupts.
1986  */
1987 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1988 				bool cpu_eq)
1989 {
1990 	int msi_vec;
1991 
1992 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1993 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1994 				GAUDI_EVENT_QUEUE_MSI_IDX);
1995 
1996 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1997 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1998 
1999 	return pci_irq_vector(hdev->pdev, msi_vec);
2000 }
2001 
2002 static int gaudi_enable_msi_single(struct hl_device *hdev)
2003 {
2004 	int rc, irq;
2005 
2006 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2007 
2008 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2009 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2010 			"gaudi single msi", hdev);
2011 	if (rc)
2012 		dev_err(hdev->dev,
2013 			"Failed to request single MSI IRQ\n");
2014 
2015 	return rc;
2016 }
2017 
2018 static int gaudi_enable_msi(struct hl_device *hdev)
2019 {
2020 	struct gaudi_device *gaudi = hdev->asic_specific;
2021 	int rc;
2022 
2023 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2024 		return 0;
2025 
2026 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2027 	if (rc < 0) {
2028 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2029 		return rc;
2030 	}
2031 
2032 	rc = gaudi_enable_msi_single(hdev);
2033 	if (rc)
2034 		goto free_pci_irq_vectors;
2035 
2036 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2037 
2038 	return 0;
2039 
2040 free_pci_irq_vectors:
2041 	pci_free_irq_vectors(hdev->pdev);
2042 	return rc;
2043 }
2044 
2045 static void gaudi_sync_irqs(struct hl_device *hdev)
2046 {
2047 	struct gaudi_device *gaudi = hdev->asic_specific;
2048 
2049 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2050 		return;
2051 
2052 	/* Wait for all pending IRQs to be finished */
2053 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2054 }
2055 
2056 static void gaudi_disable_msi(struct hl_device *hdev)
2057 {
2058 	struct gaudi_device *gaudi = hdev->asic_specific;
2059 
2060 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2061 		return;
2062 
2063 	gaudi_sync_irqs(hdev);
2064 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2065 	pci_free_irq_vectors(hdev->pdev);
2066 
2067 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2068 }
2069 
2070 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2071 {
2072 	struct gaudi_device *gaudi = hdev->asic_specific;
2073 
2074 	if (hdev->asic_prop.fw_security_enabled)
2075 		return;
2076 
2077 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2078 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2079 		return;
2080 
2081 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2082 		return;
2083 
2084 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2085 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2087 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2089 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2091 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2093 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2095 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2097 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2099 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100 
2101 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2102 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2104 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2106 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2108 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2110 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2112 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2114 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2116 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2117 
2118 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2119 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2121 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2123 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2125 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2127 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2129 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2131 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2133 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2134 
2135 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2136 }
2137 
2138 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2139 {
2140 	struct gaudi_device *gaudi = hdev->asic_specific;
2141 
2142 	if (hdev->asic_prop.fw_security_enabled)
2143 		return;
2144 
2145 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2146 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2147 		return;
2148 
2149 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2150 		return;
2151 
2152 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2157 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2159 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2161 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2163 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2165 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2167 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168 
2169 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2174 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2176 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2178 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2180 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2182 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2184 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2185 
2186 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2191 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2193 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2195 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2197 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2199 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2201 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2202 
2203 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2204 }
2205 
2206 static void gaudi_init_e2e(struct hl_device *hdev)
2207 {
2208 	if (hdev->asic_prop.fw_security_enabled)
2209 		return;
2210 
2211 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2212 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2213 		return;
2214 
2215 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2216 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2217 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2218 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2219 
2220 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2221 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2222 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2223 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2224 
2225 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2226 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2227 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2228 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2229 
2230 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2231 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2232 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2233 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2234 
2235 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2236 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2237 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2238 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2239 
2240 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2241 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2242 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2243 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2244 
2245 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2246 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2247 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2248 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2249 
2250 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2251 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2252 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2253 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2254 
2255 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2256 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2257 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2258 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2259 
2260 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2261 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2262 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2263 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2264 
2265 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2266 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2267 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2268 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2269 
2270 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2271 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2272 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2273 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2274 
2275 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2276 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2277 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2278 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2279 
2280 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2281 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2282 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2283 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2284 
2285 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2286 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2287 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2288 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2289 
2290 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2291 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2292 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2293 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2294 
2295 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2296 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2297 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2298 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2299 
2300 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2301 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2302 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2303 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2304 
2305 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2306 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2307 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2308 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2309 
2310 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2311 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2312 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2313 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2314 
2315 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2316 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2317 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2318 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2319 
2320 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2321 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2322 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2323 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2324 
2325 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2326 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2327 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2328 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2329 
2330 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2331 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2332 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2333 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2334 
2335 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2336 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2337 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2338 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2339 
2340 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2341 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2342 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2343 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2344 
2345 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2346 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2347 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2348 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2349 
2350 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2351 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2352 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2353 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2354 
2355 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2356 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2357 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2358 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2359 
2360 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2361 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2362 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2363 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2364 
2365 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2366 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2367 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2368 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2369 
2370 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2371 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2372 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2373 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2374 
2375 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2376 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2377 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2378 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2379 
2380 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2381 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2382 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2383 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2384 
2385 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2386 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2387 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2388 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2389 
2390 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2391 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2392 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2393 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2394 
2395 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2396 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2397 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2398 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2399 
2400 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2401 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2402 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2403 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2404 
2405 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2406 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2407 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2408 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2409 
2410 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2411 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2412 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2413 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2414 
2415 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2416 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2417 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2418 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2419 
2420 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2421 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2422 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2423 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2424 
2425 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2426 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2427 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2428 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2429 
2430 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2431 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2432 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2433 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2434 
2435 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2436 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2437 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2438 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2439 
2440 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2441 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2442 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2443 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2444 
2445 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2446 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2447 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2448 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2449 
2450 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2451 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2452 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2453 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2454 }
2455 
2456 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2457 {
2458 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2459 
2460 	if (hdev->asic_prop.fw_security_enabled)
2461 		return;
2462 
2463 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2464 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2465 		return;
2466 
2467 	hbm0_wr = 0x33333333;
2468 	hbm0_rd = 0x77777777;
2469 	hbm1_wr = 0x55555555;
2470 	hbm1_rd = 0xDDDDDDDD;
2471 
2472 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2473 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2474 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2475 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2476 
2477 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2478 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2479 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2480 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2481 
2482 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2483 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2484 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2485 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2486 
2487 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2488 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2489 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2490 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2491 
2492 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2493 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2496 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2499 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2502 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2503 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2504 
2505 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2506 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2509 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2512 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2515 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2516 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2517 }
2518 
2519 static void gaudi_init_golden_registers(struct hl_device *hdev)
2520 {
2521 	u32 tpc_offset;
2522 	int tpc_id, i;
2523 
2524 	gaudi_init_e2e(hdev);
2525 	gaudi_init_hbm_cred(hdev);
2526 
2527 	for (tpc_id = 0, tpc_offset = 0;
2528 				tpc_id < TPC_NUMBER_OF_ENGINES;
2529 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2530 		/* Mask all arithmetic interrupts from TPC */
2531 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2532 		/* Set 16 cache lines */
2533 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2534 				ICACHE_FETCH_LINE_NUM, 2);
2535 	}
2536 
2537 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2538 	for (i = 0 ; i < 128 ; i += 8)
2539 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2540 
2541 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2545 }
2546 
2547 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2548 					int qman_id, dma_addr_t qman_pq_addr)
2549 {
2550 	struct cpu_dyn_regs *dyn_regs =
2551 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2552 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2553 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2554 	u32 q_off, dma_qm_offset;
2555 	u32 dma_qm_err_cfg, irq_handler_offset;
2556 
2557 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2558 
2559 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2560 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2562 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2563 	so_base_en_lo = lower_32_bits(CFG_BASE +
2564 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565 	so_base_en_hi = upper_32_bits(CFG_BASE +
2566 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2567 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2568 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2570 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2571 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2572 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2574 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2575 
2576 	q_off = dma_qm_offset + qman_id * 4;
2577 
2578 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2579 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2580 
2581 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2582 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2583 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2584 
2585 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2586 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2587 							QMAN_LDMA_SRC_OFFSET);
2588 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2589 							QMAN_LDMA_DST_OFFSET);
2590 
2591 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2592 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2593 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2594 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2595 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2596 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2597 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2598 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2599 
2600 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2601 
2602 	/* The following configuration is needed only once per QMAN */
2603 	if (qman_id == 0) {
2604 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2605 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2606 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2607 
2608 		/* Configure RAZWI IRQ */
2609 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2610 		if (hdev->stop_on_err)
2611 			dma_qm_err_cfg |=
2612 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2613 
2614 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2615 
2616 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2617 			lower_32_bits(CFG_BASE + irq_handler_offset));
2618 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2619 			upper_32_bits(CFG_BASE + irq_handler_offset));
2620 
2621 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2622 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2623 									dma_id);
2624 
2625 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2626 				QM_ARB_ERR_MSG_EN_MASK);
2627 
2628 		/* Set timeout to maximum */
2629 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2630 
2631 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2632 				QMAN_EXTERNAL_MAKE_TRUSTED);
2633 
2634 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2635 	}
2636 }
2637 
2638 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2639 {
2640 	struct cpu_dyn_regs *dyn_regs =
2641 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2642 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2643 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2644 	u32 irq_handler_offset;
2645 
2646 	/* Set to maximum possible according to physical size */
2647 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2648 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2649 
2650 	/* WA for H/W bug H3-2116 */
2651 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2652 
2653 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2654 	if (hdev->stop_on_err)
2655 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2656 
2657 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2658 
2659 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2660 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2661 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2662 
2663 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2664 		lower_32_bits(CFG_BASE + irq_handler_offset));
2665 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2666 		upper_32_bits(CFG_BASE + irq_handler_offset));
2667 
2668 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2669 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2670 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2671 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2672 	/* If the channel is secured, it should be in MMU bypass mode */
2673 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2674 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2675 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2676 }
2677 
2678 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2679 				u32 enable_mask)
2680 {
2681 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2682 
2683 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2684 }
2685 
2686 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2687 {
2688 	struct gaudi_device *gaudi = hdev->asic_specific;
2689 	struct hl_hw_queue *q;
2690 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2691 
2692 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2693 		return;
2694 
2695 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2696 		dma_id = gaudi_dma_assignment[i];
2697 		/*
2698 		 * For queues after the CPU Q need to add 1 to get the correct
2699 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2700 		 * order to get the correct MSI register.
2701 		 */
2702 		if (dma_id > 1) {
2703 			cpu_skip = 1;
2704 			nic_skip = NIC_NUMBER_OF_ENGINES;
2705 		} else {
2706 			cpu_skip = 0;
2707 			nic_skip = 0;
2708 		}
2709 
2710 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2711 			q_idx = 4 * dma_id + j + cpu_skip;
2712 			q = &hdev->kernel_queues[q_idx];
2713 			q->cq_id = cq_id++;
2714 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2715 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2716 						q->bus_address);
2717 		}
2718 
2719 		gaudi_init_dma_core(hdev, dma_id);
2720 
2721 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2722 	}
2723 
2724 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2725 }
2726 
2727 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2728 					int qman_id, u64 qman_base_addr)
2729 {
2730 	struct cpu_dyn_regs *dyn_regs =
2731 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2732 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2733 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2734 	u32 dma_qm_err_cfg, irq_handler_offset;
2735 	u32 q_off, dma_qm_offset;
2736 
2737 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2738 
2739 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2740 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2742 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2743 	so_base_en_lo = lower_32_bits(CFG_BASE +
2744 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 	so_base_en_hi = upper_32_bits(CFG_BASE +
2746 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2747 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2748 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2750 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2751 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2752 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2754 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2755 
2756 	q_off = dma_qm_offset + qman_id * 4;
2757 
2758 	if (qman_id < 4) {
2759 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2760 					lower_32_bits(qman_base_addr));
2761 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2762 					upper_32_bits(qman_base_addr));
2763 
2764 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2765 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2766 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2767 
2768 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2769 							QMAN_CPDMA_SIZE_OFFSET);
2770 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2771 							QMAN_CPDMA_SRC_OFFSET);
2772 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2773 							QMAN_CPDMA_DST_OFFSET);
2774 	} else {
2775 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2776 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2777 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2778 
2779 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2780 							QMAN_LDMA_SIZE_OFFSET);
2781 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2782 							QMAN_LDMA_SRC_OFFSET);
2783 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2784 							QMAN_LDMA_DST_OFFSET);
2785 
2786 		/* Configure RAZWI IRQ */
2787 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2788 		if (hdev->stop_on_err)
2789 			dma_qm_err_cfg |=
2790 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2791 
2792 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2793 
2794 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2795 			lower_32_bits(CFG_BASE + irq_handler_offset));
2796 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2797 			upper_32_bits(CFG_BASE + irq_handler_offset));
2798 
2799 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2800 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2801 									dma_id);
2802 
2803 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2804 				QM_ARB_ERR_MSG_EN_MASK);
2805 
2806 		/* Set timeout to maximum */
2807 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2808 
2809 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2810 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2811 				QMAN_INTERNAL_MAKE_TRUSTED);
2812 	}
2813 
2814 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2815 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2816 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2817 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2818 
2819 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2820 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2821 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2822 				mtr_base_ws_lo);
2823 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2824 				mtr_base_ws_hi);
2825 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2826 				so_base_ws_lo);
2827 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2828 				so_base_ws_hi);
2829 	}
2830 }
2831 
2832 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2833 {
2834 	struct gaudi_device *gaudi = hdev->asic_specific;
2835 	struct gaudi_internal_qman_info *q;
2836 	u64 qman_base_addr;
2837 	int i, j, dma_id, internal_q_index;
2838 
2839 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2840 		return;
2841 
2842 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2843 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2844 
2845 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2846 			 /*
2847 			  * Add the CPU queue in order to get the correct queue
2848 			  * number as all internal queue are placed after it
2849 			  */
2850 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2851 
2852 			q = &gaudi->internal_qmans[internal_q_index];
2853 			qman_base_addr = (u64) q->pq_dma_addr;
2854 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2855 						qman_base_addr);
2856 		}
2857 
2858 		/* Initializing lower CP for HBM DMA QMAN */
2859 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2860 
2861 		gaudi_init_dma_core(hdev, dma_id);
2862 
2863 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2864 	}
2865 
2866 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2867 }
2868 
2869 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2870 					int qman_id, u64 qman_base_addr)
2871 {
2872 	struct cpu_dyn_regs *dyn_regs =
2873 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2874 	u32 mtr_base_lo, mtr_base_hi;
2875 	u32 so_base_lo, so_base_hi;
2876 	u32 irq_handler_offset;
2877 	u32 q_off, mme_id;
2878 	u32 mme_qm_err_cfg;
2879 
2880 	mtr_base_lo = lower_32_bits(CFG_BASE +
2881 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882 	mtr_base_hi = upper_32_bits(CFG_BASE +
2883 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2884 	so_base_lo = lower_32_bits(CFG_BASE +
2885 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886 	so_base_hi = upper_32_bits(CFG_BASE +
2887 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2888 
2889 	q_off = mme_offset + qman_id * 4;
2890 
2891 	if (qman_id < 4) {
2892 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2893 					lower_32_bits(qman_base_addr));
2894 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2895 					upper_32_bits(qman_base_addr));
2896 
2897 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2898 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2899 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2900 
2901 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2902 							QMAN_CPDMA_SIZE_OFFSET);
2903 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2904 							QMAN_CPDMA_SRC_OFFSET);
2905 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2906 							QMAN_CPDMA_DST_OFFSET);
2907 	} else {
2908 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2909 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2910 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2911 
2912 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2913 							QMAN_LDMA_SIZE_OFFSET);
2914 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2915 							QMAN_LDMA_SRC_OFFSET);
2916 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2917 							QMAN_LDMA_DST_OFFSET);
2918 
2919 		/* Configure RAZWI IRQ */
2920 		mme_id = mme_offset /
2921 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2922 
2923 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2924 		if (hdev->stop_on_err)
2925 			mme_qm_err_cfg |=
2926 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2927 
2928 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2929 
2930 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2931 			lower_32_bits(CFG_BASE + irq_handler_offset));
2932 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2933 			upper_32_bits(CFG_BASE + irq_handler_offset));
2934 
2935 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2936 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2937 									mme_id);
2938 
2939 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2940 				QM_ARB_ERR_MSG_EN_MASK);
2941 
2942 		/* Set timeout to maximum */
2943 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2944 
2945 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2946 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2947 				QMAN_INTERNAL_MAKE_TRUSTED);
2948 	}
2949 
2950 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2951 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2952 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2953 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2954 }
2955 
2956 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2957 {
2958 	struct gaudi_device *gaudi = hdev->asic_specific;
2959 	struct gaudi_internal_qman_info *q;
2960 	u64 qman_base_addr;
2961 	u32 mme_offset;
2962 	int i, internal_q_index;
2963 
2964 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2965 		return;
2966 
2967 	/*
2968 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2969 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2970 	 */
2971 
2972 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2973 
2974 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2975 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2976 		q = &gaudi->internal_qmans[internal_q_index];
2977 		qman_base_addr = (u64) q->pq_dma_addr;
2978 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2979 					qman_base_addr);
2980 		if (i == 3)
2981 			mme_offset = 0;
2982 	}
2983 
2984 	/* Initializing lower CP for MME QMANs */
2985 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2986 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2987 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2988 
2989 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2990 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991 
2992 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2993 }
2994 
2995 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2996 				int qman_id, u64 qman_base_addr)
2997 {
2998 	struct cpu_dyn_regs *dyn_regs =
2999 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3000 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3001 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3002 	u32 tpc_qm_err_cfg, irq_handler_offset;
3003 	u32 q_off, tpc_id;
3004 
3005 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3006 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3008 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3009 	so_base_en_lo = lower_32_bits(CFG_BASE +
3010 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 	so_base_en_hi = upper_32_bits(CFG_BASE +
3012 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3013 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3014 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3016 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3017 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3018 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3020 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3021 
3022 	q_off = tpc_offset + qman_id * 4;
3023 
3024 	tpc_id = tpc_offset /
3025 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3026 
3027 	if (qman_id < 4) {
3028 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3029 					lower_32_bits(qman_base_addr));
3030 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3031 					upper_32_bits(qman_base_addr));
3032 
3033 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3034 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3035 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3036 
3037 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3038 							QMAN_CPDMA_SIZE_OFFSET);
3039 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3040 							QMAN_CPDMA_SRC_OFFSET);
3041 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3042 							QMAN_CPDMA_DST_OFFSET);
3043 	} else {
3044 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3045 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3046 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3047 
3048 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3049 							QMAN_LDMA_SIZE_OFFSET);
3050 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3051 							QMAN_LDMA_SRC_OFFSET);
3052 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3053 							QMAN_LDMA_DST_OFFSET);
3054 
3055 		/* Configure RAZWI IRQ */
3056 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3057 		if (hdev->stop_on_err)
3058 			tpc_qm_err_cfg |=
3059 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3060 
3061 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3062 
3063 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3064 			lower_32_bits(CFG_BASE + irq_handler_offset));
3065 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3066 			upper_32_bits(CFG_BASE + irq_handler_offset));
3067 
3068 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3069 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3070 									tpc_id);
3071 
3072 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3073 				QM_ARB_ERR_MSG_EN_MASK);
3074 
3075 		/* Set timeout to maximum */
3076 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3077 
3078 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3079 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3080 				QMAN_INTERNAL_MAKE_TRUSTED);
3081 	}
3082 
3083 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3084 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3085 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3086 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3087 
3088 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3089 	if (tpc_id == 6) {
3090 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3091 				mtr_base_ws_lo);
3092 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3093 				mtr_base_ws_hi);
3094 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3095 				so_base_ws_lo);
3096 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3097 				so_base_ws_hi);
3098 	}
3099 }
3100 
3101 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3102 {
3103 	struct gaudi_device *gaudi = hdev->asic_specific;
3104 	struct gaudi_internal_qman_info *q;
3105 	u64 qman_base_addr;
3106 	u32 so_base_hi, tpc_offset = 0;
3107 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3108 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3109 	int i, tpc_id, internal_q_index;
3110 
3111 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3112 		return;
3113 
3114 	so_base_hi = upper_32_bits(CFG_BASE +
3115 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3116 
3117 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3118 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3119 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3120 						tpc_id * QMAN_STREAMS + i;
3121 			q = &gaudi->internal_qmans[internal_q_index];
3122 			qman_base_addr = (u64) q->pq_dma_addr;
3123 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3124 						qman_base_addr);
3125 
3126 			if (i == 3) {
3127 				/* Initializing lower CP for TPC QMAN */
3128 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3129 
3130 				/* Enable the QMAN and TPC channel */
3131 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3132 						QMAN_TPC_ENABLE);
3133 			}
3134 		}
3135 
3136 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3137 				so_base_hi);
3138 
3139 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3140 
3141 		gaudi->hw_cap_initialized |=
3142 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3143 	}
3144 }
3145 
3146 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3147 				int qman_id, u64 qman_base_addr, int nic_id)
3148 {
3149 	struct cpu_dyn_regs *dyn_regs =
3150 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3151 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3152 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3153 	u32 nic_qm_err_cfg, irq_handler_offset;
3154 	u32 q_off;
3155 
3156 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3157 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3159 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3160 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3161 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162 	so_base_en_hi = upper_32_bits(CFG_BASE +
3163 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3164 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3165 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3167 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3168 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3169 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3171 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3172 
3173 	q_off = nic_offset + qman_id * 4;
3174 
3175 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3176 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3177 
3178 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3179 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3180 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3181 
3182 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3183 							QMAN_LDMA_SIZE_OFFSET);
3184 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3185 							QMAN_LDMA_SRC_OFFSET);
3186 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3187 							QMAN_LDMA_DST_OFFSET);
3188 
3189 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3190 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3191 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3192 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3193 
3194 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3195 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3196 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3197 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3198 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3199 
3200 	if (qman_id == 0) {
3201 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3202 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3203 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3204 
3205 		/* Configure RAZWI IRQ */
3206 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3207 		if (hdev->stop_on_err)
3208 			nic_qm_err_cfg |=
3209 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3210 
3211 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3212 
3213 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3214 			lower_32_bits(CFG_BASE + irq_handler_offset));
3215 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3216 			upper_32_bits(CFG_BASE + irq_handler_offset));
3217 
3218 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3219 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3220 									nic_id);
3221 
3222 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3223 				QM_ARB_ERR_MSG_EN_MASK);
3224 
3225 		/* Set timeout to maximum */
3226 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3227 
3228 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3229 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3230 				QMAN_INTERNAL_MAKE_TRUSTED);
3231 	}
3232 }
3233 
3234 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3235 {
3236 	struct gaudi_device *gaudi = hdev->asic_specific;
3237 	struct gaudi_internal_qman_info *q;
3238 	u64 qman_base_addr;
3239 	u32 nic_offset = 0;
3240 	u32 nic_delta_between_qmans =
3241 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 	u32 nic_delta_between_nics =
3243 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3244 	int i, nic_id, internal_q_index;
3245 
3246 	if (!hdev->nic_ports_mask)
3247 		return;
3248 
3249 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3250 		return;
3251 
3252 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3253 
3254 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3255 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3256 			nic_offset += nic_delta_between_qmans;
3257 			if (nic_id & 1) {
3258 				nic_offset -= (nic_delta_between_qmans * 2);
3259 				nic_offset += nic_delta_between_nics;
3260 			}
3261 			continue;
3262 		}
3263 
3264 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3265 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3266 						nic_id * QMAN_STREAMS + i;
3267 			q = &gaudi->internal_qmans[internal_q_index];
3268 			qman_base_addr = (u64) q->pq_dma_addr;
3269 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3270 						qman_base_addr, nic_id);
3271 		}
3272 
3273 		/* Enable the QMAN */
3274 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3275 
3276 		nic_offset += nic_delta_between_qmans;
3277 		if (nic_id & 1) {
3278 			nic_offset -= (nic_delta_between_qmans * 2);
3279 			nic_offset += nic_delta_between_nics;
3280 		}
3281 
3282 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3283 	}
3284 }
3285 
3286 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3287 {
3288 	struct gaudi_device *gaudi = hdev->asic_specific;
3289 
3290 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3291 		return;
3292 
3293 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3294 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3295 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3296 }
3297 
3298 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3299 {
3300 	struct gaudi_device *gaudi = hdev->asic_specific;
3301 
3302 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3303 		return;
3304 
3305 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3306 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3307 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3308 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3309 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3310 }
3311 
3312 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3313 {
3314 	struct gaudi_device *gaudi = hdev->asic_specific;
3315 
3316 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3317 		return;
3318 
3319 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3320 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3321 }
3322 
3323 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3324 {
3325 	struct gaudi_device *gaudi = hdev->asic_specific;
3326 	u32 tpc_offset = 0;
3327 	int tpc_id;
3328 
3329 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3330 		return;
3331 
3332 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3333 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3334 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3335 	}
3336 }
3337 
3338 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3339 {
3340 	struct gaudi_device *gaudi = hdev->asic_specific;
3341 	u32 nic_mask, nic_offset = 0;
3342 	u32 nic_delta_between_qmans =
3343 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 	u32 nic_delta_between_nics =
3345 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3346 	int nic_id;
3347 
3348 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3349 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3350 
3351 		if (gaudi->hw_cap_initialized & nic_mask)
3352 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3353 
3354 		nic_offset += nic_delta_between_qmans;
3355 		if (nic_id & 1) {
3356 			nic_offset -= (nic_delta_between_qmans * 2);
3357 			nic_offset += nic_delta_between_nics;
3358 		}
3359 	}
3360 }
3361 
3362 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3363 {
3364 	struct gaudi_device *gaudi = hdev->asic_specific;
3365 
3366 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3367 		return;
3368 
3369 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3370 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373 }
3374 
3375 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3376 {
3377 	struct gaudi_device *gaudi = hdev->asic_specific;
3378 
3379 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3380 		return;
3381 
3382 	/* Stop CPs of HBM DMA QMANs */
3383 
3384 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389 }
3390 
3391 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3392 {
3393 	struct gaudi_device *gaudi = hdev->asic_specific;
3394 
3395 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3396 		return;
3397 
3398 	/* Stop CPs of MME QMANs */
3399 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3400 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401 }
3402 
3403 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3404 {
3405 	struct gaudi_device *gaudi = hdev->asic_specific;
3406 
3407 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3408 		return;
3409 
3410 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 }
3419 
3420 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3421 {
3422 	struct gaudi_device *gaudi = hdev->asic_specific;
3423 
3424 	/* Stop upper CPs of QMANs */
3425 
3426 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3427 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3428 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3429 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3430 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3431 
3432 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3433 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3434 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3435 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3436 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3437 
3438 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3439 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3440 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3441 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3442 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3443 
3444 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3445 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3446 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3447 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3448 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3449 
3450 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3451 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3452 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3453 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3454 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3455 
3456 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3457 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3458 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3459 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3460 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3461 
3462 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3463 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3464 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3465 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3466 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3467 
3468 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3469 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3470 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3471 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3472 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3473 
3474 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3475 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3476 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3477 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3478 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3479 
3480 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3481 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3482 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3483 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3484 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3485 }
3486 
3487 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3488 {
3489 	struct gaudi_device *gaudi = hdev->asic_specific;
3490 
3491 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3492 		return;
3493 
3494 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497 }
3498 
3499 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3500 {
3501 	struct gaudi_device *gaudi = hdev->asic_specific;
3502 
3503 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3504 		return;
3505 
3506 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511 }
3512 
3513 static void gaudi_mme_stall(struct hl_device *hdev)
3514 {
3515 	struct gaudi_device *gaudi = hdev->asic_specific;
3516 
3517 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3518 		return;
3519 
3520 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3521 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3522 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3524 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3526 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3528 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3530 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3532 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3534 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3536 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537 }
3538 
3539 static void gaudi_tpc_stall(struct hl_device *hdev)
3540 {
3541 	struct gaudi_device *gaudi = hdev->asic_specific;
3542 
3543 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3544 		return;
3545 
3546 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 }
3555 
3556 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3557 {
3558 	u32 qman_offset;
3559 	int i;
3560 
3561 	if (hdev->asic_prop.fw_security_enabled)
3562 		return;
3563 
3564 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3565 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3566 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3567 
3568 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3569 	}
3570 
3571 	WREG32(mmMME0_QM_CGM_CFG, 0);
3572 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3573 	WREG32(mmMME2_QM_CGM_CFG, 0);
3574 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3575 
3576 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3577 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3578 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3579 
3580 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3581 	}
3582 }
3583 
3584 static void gaudi_enable_timestamp(struct hl_device *hdev)
3585 {
3586 	/* Disable the timestamp counter */
3587 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3588 
3589 	/* Zero the lower/upper parts of the 64-bit counter */
3590 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3591 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3592 
3593 	/* Enable the counter */
3594 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3595 }
3596 
3597 static void gaudi_disable_timestamp(struct hl_device *hdev)
3598 {
3599 	/* Disable the timestamp counter */
3600 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3601 }
3602 
3603 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3604 {
3605 	u32 wait_timeout_ms;
3606 
3607 	if (hdev->pldm)
3608 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3609 	else
3610 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3611 
3612 	if (fw_reset)
3613 		goto skip_engines;
3614 
3615 	gaudi_stop_nic_qmans(hdev);
3616 	gaudi_stop_mme_qmans(hdev);
3617 	gaudi_stop_tpc_qmans(hdev);
3618 	gaudi_stop_hbm_dma_qmans(hdev);
3619 	gaudi_stop_pci_dma_qmans(hdev);
3620 
3621 	msleep(wait_timeout_ms);
3622 
3623 	gaudi_pci_dma_stall(hdev);
3624 	gaudi_hbm_dma_stall(hdev);
3625 	gaudi_tpc_stall(hdev);
3626 	gaudi_mme_stall(hdev);
3627 
3628 	msleep(wait_timeout_ms);
3629 
3630 	gaudi_disable_nic_qmans(hdev);
3631 	gaudi_disable_mme_qmans(hdev);
3632 	gaudi_disable_tpc_qmans(hdev);
3633 	gaudi_disable_hbm_dma_qmans(hdev);
3634 	gaudi_disable_pci_dma_qmans(hdev);
3635 
3636 	gaudi_disable_timestamp(hdev);
3637 
3638 skip_engines:
3639 	gaudi_disable_msi(hdev);
3640 }
3641 
3642 static int gaudi_mmu_init(struct hl_device *hdev)
3643 {
3644 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3645 	struct gaudi_device *gaudi = hdev->asic_specific;
3646 	u64 hop0_addr;
3647 	int rc, i;
3648 
3649 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3650 		return 0;
3651 
3652 	for (i = 0 ; i < prop->max_asid ; i++) {
3653 		hop0_addr = prop->mmu_pgt_addr +
3654 				(i * prop->dmmu.hop_table_size);
3655 
3656 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3657 		if (rc) {
3658 			dev_err(hdev->dev,
3659 				"failed to set hop0 addr for asid %d\n", i);
3660 			return rc;
3661 		}
3662 	}
3663 
3664 	/* init MMU cache manage page */
3665 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3666 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3667 
3668 	/* mem cache invalidation */
3669 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3670 
3671 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3672 	if (rc)
3673 		return rc;
3674 
3675 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3676 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3677 
3678 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3679 
3680 	/*
3681 	 * The H/W expects the first PI after init to be 1. After wraparound
3682 	 * we'll write 0.
3683 	 */
3684 	gaudi->mmu_cache_inv_pi = 1;
3685 
3686 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3687 
3688 	return 0;
3689 }
3690 
3691 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3692 {
3693 	void __iomem *dst;
3694 
3695 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3696 
3697 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3698 }
3699 
3700 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3701 {
3702 	void __iomem *dst;
3703 
3704 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3705 
3706 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3707 }
3708 
3709 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3710 {
3711 	struct dynamic_fw_load_mgr *dynamic_loader;
3712 	struct cpu_dyn_regs *dyn_regs;
3713 
3714 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3715 
3716 	/*
3717 	 * here we update initial values for few specific dynamic regs (as
3718 	 * before reading the first descriptor from FW those value has to be
3719 	 * hard-coded) in later stages of the protocol those values will be
3720 	 * updated automatically by reading the FW descriptor so data there
3721 	 * will always be up-to-date
3722 	 */
3723 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3724 	dyn_regs->kmd_msg_to_cpu =
3725 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3726 	dyn_regs->cpu_cmd_status_to_host =
3727 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3728 
3729 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3730 }
3731 
3732 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3733 {
3734 	struct static_fw_load_mgr *static_loader;
3735 
3736 	static_loader = &hdev->fw_loader.static_loader;
3737 
3738 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3739 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3740 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3741 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3742 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3743 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3744 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3745 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3746 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3747 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3748 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3749 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3750 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3751 			GAUDI_PLDM_RESET_WAIT_MSEC :
3752 			GAUDI_CPU_RESET_WAIT_MSEC;
3753 }
3754 
3755 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3756 {
3757 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3758 
3759 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3760 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3761 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3762 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3763 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3764 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3765 }
3766 
3767 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3768 {
3769 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3770 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3771 
3772 	/* fill common fields */
3773 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3774 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3775 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3776 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3777 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3778 	fw_loader->skip_bmc = !hdev->bmc_enable;
3779 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3780 	fw_loader->dram_bar_id = HBM_BAR_ID;
3781 
3782 	if (prop->dynamic_fw_load)
3783 		gaudi_init_dynamic_firmware_loader(hdev);
3784 	else
3785 		gaudi_init_static_firmware_loader(hdev);
3786 }
3787 
3788 static int gaudi_init_cpu(struct hl_device *hdev)
3789 {
3790 	struct gaudi_device *gaudi = hdev->asic_specific;
3791 	int rc;
3792 
3793 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3794 		return 0;
3795 
3796 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3797 		return 0;
3798 
3799 	/*
3800 	 * The device CPU works with 40 bits addresses.
3801 	 * This register sets the extension to 50 bits.
3802 	 */
3803 	if (!hdev->asic_prop.fw_security_enabled)
3804 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3805 
3806 	rc = hl_fw_init_cpu(hdev);
3807 
3808 	if (rc)
3809 		return rc;
3810 
3811 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3812 
3813 	return 0;
3814 }
3815 
3816 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3817 {
3818 	struct cpu_dyn_regs *dyn_regs =
3819 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3820 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3821 	struct gaudi_device *gaudi = hdev->asic_specific;
3822 	u32 status, irq_handler_offset;
3823 	struct hl_eq *eq;
3824 	struct hl_hw_queue *cpu_pq =
3825 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3826 	int err;
3827 
3828 	if (!hdev->cpu_queues_enable)
3829 		return 0;
3830 
3831 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3832 		return 0;
3833 
3834 	eq = &hdev->event_queue;
3835 
3836 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3837 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3838 
3839 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3840 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3841 
3842 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3843 			lower_32_bits(hdev->cpu_accessible_dma_address));
3844 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3845 			upper_32_bits(hdev->cpu_accessible_dma_address));
3846 
3847 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3848 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3849 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3850 
3851 	/* Used for EQ CI */
3852 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3853 
3854 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3855 
3856 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3857 
3858 	irq_handler_offset = prop->gic_interrupts_enable ?
3859 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3860 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3861 
3862 	WREG32(irq_handler_offset,
3863 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3864 
3865 	err = hl_poll_timeout(
3866 		hdev,
3867 		mmCPU_IF_QUEUE_INIT,
3868 		status,
3869 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3870 		1000,
3871 		cpu_timeout);
3872 
3873 	if (err) {
3874 		dev_err(hdev->dev,
3875 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3876 		return -EIO;
3877 	}
3878 
3879 	/* update FW application security bits */
3880 	if (prop->fw_cpu_boot_dev_sts0_valid)
3881 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3882 	if (prop->fw_cpu_boot_dev_sts1_valid)
3883 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3884 
3885 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3886 	return 0;
3887 }
3888 
3889 static void gaudi_pre_hw_init(struct hl_device *hdev)
3890 {
3891 	/* Perform read from the device to make sure device is up */
3892 	RREG32(mmHW_STATE);
3893 
3894 	if (!hdev->asic_prop.fw_security_enabled) {
3895 		/* Set the access through PCI bars (Linux driver only) as
3896 		 * secured
3897 		 */
3898 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3899 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3900 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3901 
3902 		/* Perform read to flush the waiting writes to ensure
3903 		 * configuration was set in the device
3904 		 */
3905 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3906 	}
3907 
3908 	/*
3909 	 * Let's mark in the H/W that we have reached this point. We check
3910 	 * this value in the reset_before_init function to understand whether
3911 	 * we need to reset the chip before doing H/W init. This register is
3912 	 * cleared by the H/W upon H/W reset
3913 	 */
3914 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3915 }
3916 
3917 static int gaudi_hw_init(struct hl_device *hdev)
3918 {
3919 	struct gaudi_device *gaudi = hdev->asic_specific;
3920 	int rc;
3921 
3922 	gaudi_pre_hw_init(hdev);
3923 
3924 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3925 	 * So we set it here and if anyone tries to move it later to
3926 	 * a different address, there will be an error
3927 	 */
3928 	if (hdev->asic_prop.iatu_done_by_fw)
3929 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3930 
3931 	/*
3932 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3933 	 * base address of dram
3934 	 */
3935 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3936 		dev_err(hdev->dev,
3937 			"failed to map HBM bar to DRAM base address\n");
3938 		return -EIO;
3939 	}
3940 
3941 	rc = gaudi_init_cpu(hdev);
3942 	if (rc) {
3943 		dev_err(hdev->dev, "failed to initialize CPU\n");
3944 		return rc;
3945 	}
3946 
3947 	/* In case the clock gating was enabled in preboot we need to disable
3948 	 * it here before touching the MME/TPC registers.
3949 	 */
3950 	gaudi_disable_clock_gating(hdev);
3951 
3952 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3953 	gaudi_init_scrambler_sram(hdev);
3954 
3955 	/* This is here just in case we are working without CPU */
3956 	gaudi_init_scrambler_hbm(hdev);
3957 
3958 	gaudi_init_golden_registers(hdev);
3959 
3960 	rc = gaudi_mmu_init(hdev);
3961 	if (rc)
3962 		return rc;
3963 
3964 	gaudi_init_security(hdev);
3965 
3966 	gaudi_init_pci_dma_qmans(hdev);
3967 
3968 	gaudi_init_hbm_dma_qmans(hdev);
3969 
3970 	gaudi_init_mme_qmans(hdev);
3971 
3972 	gaudi_init_tpc_qmans(hdev);
3973 
3974 	gaudi_init_nic_qmans(hdev);
3975 
3976 	gaudi_enable_timestamp(hdev);
3977 
3978 	/* MSI must be enabled before CPU queues and NIC are initialized */
3979 	rc = gaudi_enable_msi(hdev);
3980 	if (rc)
3981 		goto disable_queues;
3982 
3983 	/* must be called after MSI was enabled */
3984 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3985 	if (rc) {
3986 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3987 			rc);
3988 		goto disable_msi;
3989 	}
3990 
3991 	/* Perform read from the device to flush all configuration */
3992 	RREG32(mmHW_STATE);
3993 
3994 	return 0;
3995 
3996 disable_msi:
3997 	gaudi_disable_msi(hdev);
3998 disable_queues:
3999 	gaudi_disable_mme_qmans(hdev);
4000 	gaudi_disable_pci_dma_qmans(hdev);
4001 
4002 	return rc;
4003 }
4004 
4005 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4006 {
4007 	struct cpu_dyn_regs *dyn_regs =
4008 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4009 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4010 	struct gaudi_device *gaudi = hdev->asic_specific;
4011 	bool driver_performs_reset;
4012 
4013 	if (!hard_reset) {
4014 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4015 		return 0;
4016 	}
4017 
4018 	if (hdev->pldm) {
4019 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4020 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4021 	} else {
4022 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4023 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4024 	}
4025 
4026 	if (fw_reset) {
4027 		dev_dbg(hdev->dev,
4028 			"Firmware performs HARD reset, going to wait %dms\n",
4029 			reset_timeout_ms);
4030 
4031 		goto skip_reset;
4032 	}
4033 
4034 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4035 					!hdev->asic_prop.hard_reset_done_by_fw);
4036 
4037 	/* Set device to handle FLR by H/W as we will put the device CPU to
4038 	 * halt mode
4039 	 */
4040 	if (driver_performs_reset)
4041 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4042 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4043 
4044 	/* If linux is loaded in the device CPU we need to communicate with it
4045 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4046 	 * registers in case of old F/Ws
4047 	 */
4048 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4049 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4050 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4051 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4052 
4053 		WREG32(irq_handler_offset,
4054 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4055 
4056 		/* This is a hail-mary attempt to revive the card in the small chance that the
4057 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4058 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4059 		 * reset as if Linux wasn't loaded.
4060 		 *
4061 		 * We do it only if the reset cause was HB, because that would be the indication
4062 		 * of such an event.
4063 		 *
4064 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4065 		 * damage.
4066 		 */
4067 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4068 			if (hdev->asic_prop.hard_reset_done_by_fw)
4069 				hl_fw_ask_hard_reset_without_linux(hdev);
4070 			else
4071 				hl_fw_ask_halt_machine_without_linux(hdev);
4072 		}
4073 	} else {
4074 		if (hdev->asic_prop.hard_reset_done_by_fw)
4075 			hl_fw_ask_hard_reset_without_linux(hdev);
4076 		else
4077 			hl_fw_ask_halt_machine_without_linux(hdev);
4078 	}
4079 
4080 	if (driver_performs_reset) {
4081 
4082 		/* Configure the reset registers. Must be done as early as
4083 		 * possible in case we fail during H/W initialization
4084 		 */
4085 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4086 						(CFG_RST_H_DMA_MASK |
4087 						CFG_RST_H_MME_MASK |
4088 						CFG_RST_H_SM_MASK |
4089 						CFG_RST_H_TPC_7_MASK));
4090 
4091 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4092 
4093 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4094 						(CFG_RST_H_HBM_MASK |
4095 						CFG_RST_H_TPC_7_MASK |
4096 						CFG_RST_H_NIC_MASK |
4097 						CFG_RST_H_SM_MASK |
4098 						CFG_RST_H_DMA_MASK |
4099 						CFG_RST_H_MME_MASK |
4100 						CFG_RST_H_CPU_MASK |
4101 						CFG_RST_H_MMU_MASK));
4102 
4103 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4104 						(CFG_RST_L_IF_MASK |
4105 						CFG_RST_L_PSOC_MASK |
4106 						CFG_RST_L_TPC_MASK));
4107 
4108 		msleep(cpu_timeout_ms);
4109 
4110 		/* Tell ASIC not to re-initialize PCIe */
4111 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4112 
4113 		/* Restart BTL/BLR upon hard-reset */
4114 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4115 
4116 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4117 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4118 
4119 		dev_dbg(hdev->dev,
4120 			"Issued HARD reset command, going to wait %dms\n",
4121 			reset_timeout_ms);
4122 	} else {
4123 		dev_dbg(hdev->dev,
4124 			"Firmware performs HARD reset, going to wait %dms\n",
4125 			reset_timeout_ms);
4126 	}
4127 
4128 skip_reset:
4129 	/*
4130 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4131 	 * itself is in reset. Need to wait until the reset is deasserted
4132 	 */
4133 	msleep(reset_timeout_ms);
4134 
4135 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4136 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4137 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4138 		return -ETIMEDOUT;
4139 	}
4140 
4141 	if (gaudi) {
4142 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4143 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4144 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4145 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4146 						HW_CAP_HBM_SCRAMBLER);
4147 
4148 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4149 
4150 		hdev->device_cpu_is_halted = false;
4151 	}
4152 	return 0;
4153 }
4154 
4155 static int gaudi_suspend(struct hl_device *hdev)
4156 {
4157 	int rc;
4158 
4159 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4160 	if (rc)
4161 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4162 
4163 	return rc;
4164 }
4165 
4166 static int gaudi_resume(struct hl_device *hdev)
4167 {
4168 	return gaudi_init_iatu(hdev);
4169 }
4170 
4171 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4172 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4173 {
4174 	int rc;
4175 
4176 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4177 			VM_DONTCOPY | VM_NORESERVE);
4178 
4179 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4180 				(dma_addr - HOST_PHYS_BASE), size);
4181 	if (rc)
4182 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4183 
4184 	return rc;
4185 }
4186 
4187 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4188 {
4189 	struct cpu_dyn_regs *dyn_regs =
4190 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4191 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4192 	struct gaudi_device *gaudi = hdev->asic_specific;
4193 	bool invalid_queue = false;
4194 	int dma_id;
4195 
4196 	switch (hw_queue_id) {
4197 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4198 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4199 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4200 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4201 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4202 		break;
4203 
4204 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4205 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4206 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4207 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4208 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4209 		break;
4210 
4211 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4212 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4213 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4214 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4215 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4216 		break;
4217 
4218 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4219 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4220 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4221 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4222 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4223 		break;
4224 
4225 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4226 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4227 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4228 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4229 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4230 		break;
4231 
4232 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4233 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4234 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4235 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4236 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4237 		break;
4238 
4239 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4240 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4241 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4242 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4243 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4244 		break;
4245 
4246 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4247 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4248 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4249 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4250 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4251 		break;
4252 
4253 	case GAUDI_QUEUE_ID_CPU_PQ:
4254 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4255 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4256 		else
4257 			invalid_queue = true;
4258 		break;
4259 
4260 	case GAUDI_QUEUE_ID_MME_0_0:
4261 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4262 		break;
4263 
4264 	case GAUDI_QUEUE_ID_MME_0_1:
4265 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4266 		break;
4267 
4268 	case GAUDI_QUEUE_ID_MME_0_2:
4269 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4270 		break;
4271 
4272 	case GAUDI_QUEUE_ID_MME_0_3:
4273 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4274 		break;
4275 
4276 	case GAUDI_QUEUE_ID_MME_1_0:
4277 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4278 		break;
4279 
4280 	case GAUDI_QUEUE_ID_MME_1_1:
4281 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4282 		break;
4283 
4284 	case GAUDI_QUEUE_ID_MME_1_2:
4285 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4286 		break;
4287 
4288 	case GAUDI_QUEUE_ID_MME_1_3:
4289 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4290 		break;
4291 
4292 	case GAUDI_QUEUE_ID_TPC_0_0:
4293 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4294 		break;
4295 
4296 	case GAUDI_QUEUE_ID_TPC_0_1:
4297 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4298 		break;
4299 
4300 	case GAUDI_QUEUE_ID_TPC_0_2:
4301 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4302 		break;
4303 
4304 	case GAUDI_QUEUE_ID_TPC_0_3:
4305 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4306 		break;
4307 
4308 	case GAUDI_QUEUE_ID_TPC_1_0:
4309 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4310 		break;
4311 
4312 	case GAUDI_QUEUE_ID_TPC_1_1:
4313 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4314 		break;
4315 
4316 	case GAUDI_QUEUE_ID_TPC_1_2:
4317 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4318 		break;
4319 
4320 	case GAUDI_QUEUE_ID_TPC_1_3:
4321 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4322 		break;
4323 
4324 	case GAUDI_QUEUE_ID_TPC_2_0:
4325 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4326 		break;
4327 
4328 	case GAUDI_QUEUE_ID_TPC_2_1:
4329 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4330 		break;
4331 
4332 	case GAUDI_QUEUE_ID_TPC_2_2:
4333 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4334 		break;
4335 
4336 	case GAUDI_QUEUE_ID_TPC_2_3:
4337 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4338 		break;
4339 
4340 	case GAUDI_QUEUE_ID_TPC_3_0:
4341 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4342 		break;
4343 
4344 	case GAUDI_QUEUE_ID_TPC_3_1:
4345 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4346 		break;
4347 
4348 	case GAUDI_QUEUE_ID_TPC_3_2:
4349 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4350 		break;
4351 
4352 	case GAUDI_QUEUE_ID_TPC_3_3:
4353 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4354 		break;
4355 
4356 	case GAUDI_QUEUE_ID_TPC_4_0:
4357 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4358 		break;
4359 
4360 	case GAUDI_QUEUE_ID_TPC_4_1:
4361 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4362 		break;
4363 
4364 	case GAUDI_QUEUE_ID_TPC_4_2:
4365 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4366 		break;
4367 
4368 	case GAUDI_QUEUE_ID_TPC_4_3:
4369 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4370 		break;
4371 
4372 	case GAUDI_QUEUE_ID_TPC_5_0:
4373 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4374 		break;
4375 
4376 	case GAUDI_QUEUE_ID_TPC_5_1:
4377 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4378 		break;
4379 
4380 	case GAUDI_QUEUE_ID_TPC_5_2:
4381 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4382 		break;
4383 
4384 	case GAUDI_QUEUE_ID_TPC_5_3:
4385 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4386 		break;
4387 
4388 	case GAUDI_QUEUE_ID_TPC_6_0:
4389 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4390 		break;
4391 
4392 	case GAUDI_QUEUE_ID_TPC_6_1:
4393 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4394 		break;
4395 
4396 	case GAUDI_QUEUE_ID_TPC_6_2:
4397 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4398 		break;
4399 
4400 	case GAUDI_QUEUE_ID_TPC_6_3:
4401 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4402 		break;
4403 
4404 	case GAUDI_QUEUE_ID_TPC_7_0:
4405 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4406 		break;
4407 
4408 	case GAUDI_QUEUE_ID_TPC_7_1:
4409 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4410 		break;
4411 
4412 	case GAUDI_QUEUE_ID_TPC_7_2:
4413 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4414 		break;
4415 
4416 	case GAUDI_QUEUE_ID_TPC_7_3:
4417 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4418 		break;
4419 
4420 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4421 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4422 			invalid_queue = true;
4423 
4424 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4425 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4426 		break;
4427 
4428 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4429 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4430 			invalid_queue = true;
4431 
4432 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4434 		break;
4435 
4436 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4437 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4438 			invalid_queue = true;
4439 
4440 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4445 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4446 			invalid_queue = true;
4447 
4448 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4450 		break;
4451 
4452 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4453 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4454 			invalid_queue = true;
4455 
4456 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4458 		break;
4459 
4460 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4461 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4462 			invalid_queue = true;
4463 
4464 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4466 		break;
4467 
4468 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4469 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4470 			invalid_queue = true;
4471 
4472 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4474 		break;
4475 
4476 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4477 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4478 			invalid_queue = true;
4479 
4480 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4482 		break;
4483 
4484 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4485 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4486 			invalid_queue = true;
4487 
4488 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4490 		break;
4491 
4492 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4493 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4494 			invalid_queue = true;
4495 
4496 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4497 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4498 		break;
4499 
4500 	default:
4501 		invalid_queue = true;
4502 	}
4503 
4504 	if (invalid_queue) {
4505 		/* Should never get here */
4506 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4507 			hw_queue_id);
4508 		return;
4509 	}
4510 
4511 	db_value = pi;
4512 
4513 	/* ring the doorbell */
4514 	WREG32(db_reg_offset, db_value);
4515 
4516 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4517 		/* make sure device CPU will read latest data from host */
4518 		mb();
4519 
4520 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4521 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4522 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4523 
4524 		WREG32(irq_handler_offset,
4525 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4526 	}
4527 }
4528 
4529 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4530 				struct hl_bd *bd)
4531 {
4532 	__le64 *pbd = (__le64 *) bd;
4533 
4534 	/* The QMANs are on the host memory so a simple copy suffice */
4535 	pqe[0] = pbd[0];
4536 	pqe[1] = pbd[1];
4537 }
4538 
4539 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4540 					dma_addr_t *dma_handle, gfp_t flags)
4541 {
4542 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4543 						dma_handle, flags);
4544 
4545 	/* Shift to the device's base physical address of host memory */
4546 	if (kernel_addr)
4547 		*dma_handle += HOST_PHYS_BASE;
4548 
4549 	return kernel_addr;
4550 }
4551 
4552 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4553 		void *cpu_addr, dma_addr_t dma_handle)
4554 {
4555 	/* Cancel the device's base physical address of host memory */
4556 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4557 
4558 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4559 }
4560 
4561 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4562 {
4563 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4564 	u64 cur_addr = prop->dram_user_base_address;
4565 	u32 chunk_size, busy;
4566 	int rc, dma_id;
4567 
4568 	while (cur_addr < prop->dram_end_address) {
4569 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4570 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4571 
4572 			chunk_size =
4573 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4574 
4575 			dev_dbg(hdev->dev,
4576 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4577 				cur_addr, cur_addr + chunk_size);
4578 
4579 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4580 					lower_32_bits(val));
4581 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4582 					upper_32_bits(val));
4583 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4584 						lower_32_bits(cur_addr));
4585 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4586 						upper_32_bits(cur_addr));
4587 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4588 					chunk_size);
4589 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4590 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4591 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4592 
4593 			cur_addr += chunk_size;
4594 
4595 			if (cur_addr == prop->dram_end_address)
4596 				break;
4597 		}
4598 
4599 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4600 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4601 
4602 			rc = hl_poll_timeout(
4603 				hdev,
4604 				mmDMA0_CORE_STS0 + dma_offset,
4605 				busy,
4606 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4607 				1000,
4608 				HBM_SCRUBBING_TIMEOUT_US);
4609 
4610 			if (rc) {
4611 				dev_err(hdev->dev,
4612 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4613 					dma_id);
4614 				return -EIO;
4615 			}
4616 		}
4617 	}
4618 
4619 	return 0;
4620 }
4621 
4622 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4623 {
4624 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4625 	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4626 	u64 addr, size, val = hdev->memory_scrub_val;
4627 	ktime_t timeout;
4628 	int rc = 0;
4629 
4630 	if (!hdev->memory_scrub)
4631 		return 0;
4632 
4633 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4634 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4635 		if (ktime_compare(ktime_get(), timeout) > 0) {
4636 			dev_err(hdev->dev, "waiting for idle timeout\n");
4637 			return -ETIMEDOUT;
4638 		}
4639 		usleep_range((1000 >> 2) + 1, 1000);
4640 	}
4641 
4642 	/* Scrub SRAM */
4643 	addr = prop->sram_user_base_address;
4644 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4645 
4646 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4647 			addr, addr + size, val);
4648 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4649 	if (rc) {
4650 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4651 		return rc;
4652 	}
4653 
4654 	/* Scrub HBM using all DMA channels in parallel */
4655 	rc = gaudi_scrub_device_dram(hdev, val);
4656 	if (rc) {
4657 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4658 		return rc;
4659 	}
4660 
4661 	return 0;
4662 }
4663 
4664 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4665 				u32 queue_id, dma_addr_t *dma_handle,
4666 				u16 *queue_len)
4667 {
4668 	struct gaudi_device *gaudi = hdev->asic_specific;
4669 	struct gaudi_internal_qman_info *q;
4670 
4671 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4672 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4673 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4674 		return NULL;
4675 	}
4676 
4677 	q = &gaudi->internal_qmans[queue_id];
4678 	*dma_handle = q->pq_dma_addr;
4679 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4680 
4681 	return q->pq_kernel_addr;
4682 }
4683 
4684 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4685 				u16 len, u32 timeout, u64 *result)
4686 {
4687 	struct gaudi_device *gaudi = hdev->asic_specific;
4688 
4689 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4690 		if (result)
4691 			*result = 0;
4692 		return 0;
4693 	}
4694 
4695 	if (!timeout)
4696 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4697 
4698 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4699 						timeout, result);
4700 }
4701 
4702 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4703 {
4704 	struct packet_msg_prot *fence_pkt;
4705 	dma_addr_t pkt_dma_addr;
4706 	u32 fence_val, tmp, timeout_usec;
4707 	dma_addr_t fence_dma_addr;
4708 	u32 *fence_ptr;
4709 	int rc;
4710 
4711 	if (hdev->pldm)
4712 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4713 	else
4714 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4715 
4716 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4717 
4718 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4719 	if (!fence_ptr) {
4720 		dev_err(hdev->dev,
4721 			"Failed to allocate memory for H/W queue %d testing\n",
4722 			hw_queue_id);
4723 		return -ENOMEM;
4724 	}
4725 
4726 	*fence_ptr = 0;
4727 
4728 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4729 						&pkt_dma_addr);
4730 	if (!fence_pkt) {
4731 		dev_err(hdev->dev,
4732 			"Failed to allocate packet for H/W queue %d testing\n",
4733 			hw_queue_id);
4734 		rc = -ENOMEM;
4735 		goto free_fence_ptr;
4736 	}
4737 
4738 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4739 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4740 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4741 
4742 	fence_pkt->ctl = cpu_to_le32(tmp);
4743 	fence_pkt->value = cpu_to_le32(fence_val);
4744 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4745 
4746 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4747 					sizeof(struct packet_msg_prot),
4748 					pkt_dma_addr);
4749 	if (rc) {
4750 		dev_err(hdev->dev,
4751 			"Failed to send fence packet to H/W queue %d\n",
4752 			hw_queue_id);
4753 		goto free_pkt;
4754 	}
4755 
4756 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4757 					1000, timeout_usec, true);
4758 
4759 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4760 
4761 	if (rc == -ETIMEDOUT) {
4762 		dev_err(hdev->dev,
4763 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4764 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4765 		rc = -EIO;
4766 	}
4767 
4768 free_pkt:
4769 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4770 free_fence_ptr:
4771 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4772 	return rc;
4773 }
4774 
4775 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4776 {
4777 	struct gaudi_device *gaudi = hdev->asic_specific;
4778 
4779 	/*
4780 	 * check capability here as send_cpu_message() won't update the result
4781 	 * value if no capability
4782 	 */
4783 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4784 		return 0;
4785 
4786 	return hl_fw_test_cpu_queue(hdev);
4787 }
4788 
4789 static int gaudi_test_queues(struct hl_device *hdev)
4790 {
4791 	int i, rc, ret_val = 0;
4792 
4793 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4794 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4795 			rc = gaudi_test_queue(hdev, i);
4796 			if (rc)
4797 				ret_val = -EINVAL;
4798 		}
4799 	}
4800 
4801 	rc = gaudi_test_cpu_queue(hdev);
4802 	if (rc)
4803 		ret_val = -EINVAL;
4804 
4805 	return ret_val;
4806 }
4807 
4808 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4809 		gfp_t mem_flags, dma_addr_t *dma_handle)
4810 {
4811 	void *kernel_addr;
4812 
4813 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4814 		return NULL;
4815 
4816 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4817 
4818 	/* Shift to the device's base physical address of host memory */
4819 	if (kernel_addr)
4820 		*dma_handle += HOST_PHYS_BASE;
4821 
4822 	return kernel_addr;
4823 }
4824 
4825 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4826 			dma_addr_t dma_addr)
4827 {
4828 	/* Cancel the device's base physical address of host memory */
4829 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4830 
4831 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4832 }
4833 
4834 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4835 					size_t size, dma_addr_t *dma_handle)
4836 {
4837 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4838 }
4839 
4840 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4841 						size_t size, void *vaddr)
4842 {
4843 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4844 }
4845 
4846 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4847 {
4848 	struct scatterlist *sg, *sg_next_iter;
4849 	u32 count, dma_desc_cnt;
4850 	u64 len, len_next;
4851 	dma_addr_t addr, addr_next;
4852 
4853 	dma_desc_cnt = 0;
4854 
4855 	for_each_sgtable_dma_sg(sgt, sg, count) {
4856 		len = sg_dma_len(sg);
4857 		addr = sg_dma_address(sg);
4858 
4859 		if (len == 0)
4860 			break;
4861 
4862 		while ((count + 1) < sgt->nents) {
4863 			sg_next_iter = sg_next(sg);
4864 			len_next = sg_dma_len(sg_next_iter);
4865 			addr_next = sg_dma_address(sg_next_iter);
4866 
4867 			if (len_next == 0)
4868 				break;
4869 
4870 			if ((addr + len == addr_next) &&
4871 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4872 				len += len_next;
4873 				count++;
4874 				sg = sg_next_iter;
4875 			} else {
4876 				break;
4877 			}
4878 		}
4879 
4880 		dma_desc_cnt++;
4881 	}
4882 
4883 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4884 }
4885 
4886 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4887 				struct hl_cs_parser *parser,
4888 				struct packet_lin_dma *user_dma_pkt,
4889 				u64 addr, enum dma_data_direction dir)
4890 {
4891 	struct hl_userptr *userptr;
4892 	int rc;
4893 
4894 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4895 			parser->job_userptr_list, &userptr))
4896 		goto already_pinned;
4897 
4898 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4899 	if (!userptr)
4900 		return -ENOMEM;
4901 
4902 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4903 				userptr);
4904 	if (rc)
4905 		goto free_userptr;
4906 
4907 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4908 
4909 	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4910 	if (rc) {
4911 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4912 		goto unpin_memory;
4913 	}
4914 
4915 	userptr->dma_mapped = true;
4916 	userptr->dir = dir;
4917 
4918 already_pinned:
4919 	parser->patched_cb_size +=
4920 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4921 
4922 	return 0;
4923 
4924 unpin_memory:
4925 	list_del(&userptr->job_node);
4926 	hl_unpin_host_memory(hdev, userptr);
4927 free_userptr:
4928 	kfree(userptr);
4929 	return rc;
4930 }
4931 
4932 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4933 				struct hl_cs_parser *parser,
4934 				struct packet_lin_dma *user_dma_pkt,
4935 				bool src_in_host)
4936 {
4937 	enum dma_data_direction dir;
4938 	bool skip_host_mem_pin = false, user_memset;
4939 	u64 addr;
4940 	int rc = 0;
4941 
4942 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4943 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4944 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4945 
4946 	if (src_in_host) {
4947 		if (user_memset)
4948 			skip_host_mem_pin = true;
4949 
4950 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4951 		dir = DMA_TO_DEVICE;
4952 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4953 	} else {
4954 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4955 		dir = DMA_FROM_DEVICE;
4956 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4957 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4958 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4959 	}
4960 
4961 	if (skip_host_mem_pin)
4962 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4963 	else
4964 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4965 						addr, dir);
4966 
4967 	return rc;
4968 }
4969 
4970 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4971 				struct hl_cs_parser *parser,
4972 				struct packet_lin_dma *user_dma_pkt)
4973 {
4974 	bool src_in_host = false;
4975 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4976 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4977 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4978 
4979 	dev_dbg(hdev->dev, "DMA packet details:\n");
4980 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4981 				le64_to_cpu(user_dma_pkt->src_addr));
4982 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4983 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4984 
4985 	/*
4986 	 * Special handling for DMA with size 0. Bypass all validations
4987 	 * because no transactions will be done except for WR_COMP, which
4988 	 * is not a security issue
4989 	 */
4990 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
4991 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4992 		return 0;
4993 	}
4994 
4995 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4996 		src_in_host = true;
4997 
4998 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4999 						src_in_host);
5000 }
5001 
5002 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5003 					struct hl_cs_parser *parser,
5004 					struct packet_load_and_exe *user_pkt)
5005 {
5006 	u32 cfg;
5007 
5008 	cfg = le32_to_cpu(user_pkt->cfg);
5009 
5010 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5011 		dev_err(hdev->dev,
5012 			"User not allowed to use Load and Execute\n");
5013 		return -EPERM;
5014 	}
5015 
5016 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5017 
5018 	return 0;
5019 }
5020 
5021 static int gaudi_validate_cb(struct hl_device *hdev,
5022 			struct hl_cs_parser *parser, bool is_mmu)
5023 {
5024 	u32 cb_parsed_length = 0;
5025 	int rc = 0;
5026 
5027 	parser->patched_cb_size = 0;
5028 
5029 	/* cb_user_size is more than 0 so loop will always be executed */
5030 	while (cb_parsed_length < parser->user_cb_size) {
5031 		enum packet_id pkt_id;
5032 		u16 pkt_size;
5033 		struct gaudi_packet *user_pkt;
5034 
5035 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5036 
5037 		pkt_id = (enum packet_id) (
5038 				(le64_to_cpu(user_pkt->header) &
5039 				PACKET_HEADER_PACKET_ID_MASK) >>
5040 					PACKET_HEADER_PACKET_ID_SHIFT);
5041 
5042 		if (!validate_packet_id(pkt_id)) {
5043 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5044 			rc = -EINVAL;
5045 			break;
5046 		}
5047 
5048 		pkt_size = gaudi_packet_sizes[pkt_id];
5049 		cb_parsed_length += pkt_size;
5050 		if (cb_parsed_length > parser->user_cb_size) {
5051 			dev_err(hdev->dev,
5052 				"packet 0x%x is out of CB boundary\n", pkt_id);
5053 			rc = -EINVAL;
5054 			break;
5055 		}
5056 
5057 		switch (pkt_id) {
5058 		case PACKET_MSG_PROT:
5059 			dev_err(hdev->dev,
5060 				"User not allowed to use MSG_PROT\n");
5061 			rc = -EPERM;
5062 			break;
5063 
5064 		case PACKET_CP_DMA:
5065 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5066 			rc = -EPERM;
5067 			break;
5068 
5069 		case PACKET_STOP:
5070 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5071 			rc = -EPERM;
5072 			break;
5073 
5074 		case PACKET_WREG_BULK:
5075 			dev_err(hdev->dev,
5076 				"User not allowed to use WREG_BULK\n");
5077 			rc = -EPERM;
5078 			break;
5079 
5080 		case PACKET_LOAD_AND_EXE:
5081 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5082 				(struct packet_load_and_exe *) user_pkt);
5083 			break;
5084 
5085 		case PACKET_LIN_DMA:
5086 			parser->contains_dma_pkt = true;
5087 			if (is_mmu)
5088 				parser->patched_cb_size += pkt_size;
5089 			else
5090 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5091 					(struct packet_lin_dma *) user_pkt);
5092 			break;
5093 
5094 		case PACKET_WREG_32:
5095 		case PACKET_MSG_LONG:
5096 		case PACKET_MSG_SHORT:
5097 		case PACKET_REPEAT:
5098 		case PACKET_FENCE:
5099 		case PACKET_NOP:
5100 		case PACKET_ARB_POINT:
5101 			parser->patched_cb_size += pkt_size;
5102 			break;
5103 
5104 		default:
5105 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5106 				pkt_id);
5107 			rc = -EINVAL;
5108 			break;
5109 		}
5110 
5111 		if (rc)
5112 			break;
5113 	}
5114 
5115 	/*
5116 	 * The new CB should have space at the end for two MSG_PROT packets:
5117 	 * 1. Optional NOP padding for cacheline alignment
5118 	 * 2. A packet that will act as a completion packet
5119 	 * 3. A packet that will generate MSI interrupt
5120 	 */
5121 	if (parser->completion)
5122 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5123 			parser->patched_cb_size);
5124 
5125 	return rc;
5126 }
5127 
5128 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5129 				struct hl_cs_parser *parser,
5130 				struct packet_lin_dma *user_dma_pkt,
5131 				struct packet_lin_dma *new_dma_pkt,
5132 				u32 *new_dma_pkt_size)
5133 {
5134 	struct hl_userptr *userptr;
5135 	struct scatterlist *sg, *sg_next_iter;
5136 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5137 	u64 len, len_next;
5138 	dma_addr_t dma_addr, dma_addr_next;
5139 	u64 device_memory_addr, addr;
5140 	enum dma_data_direction dir;
5141 	struct sg_table *sgt;
5142 	bool src_in_host = false;
5143 	bool skip_host_mem_pin = false;
5144 	bool user_memset;
5145 
5146 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5147 
5148 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5149 		src_in_host = true;
5150 
5151 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5152 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5153 
5154 	if (src_in_host) {
5155 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5156 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5157 		dir = DMA_TO_DEVICE;
5158 		if (user_memset)
5159 			skip_host_mem_pin = true;
5160 	} else {
5161 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5162 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5163 		dir = DMA_FROM_DEVICE;
5164 	}
5165 
5166 	if ((!skip_host_mem_pin) &&
5167 		(!hl_userptr_is_pinned(hdev, addr,
5168 					le32_to_cpu(user_dma_pkt->tsize),
5169 					parser->job_userptr_list, &userptr))) {
5170 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5171 				addr, user_dma_pkt->tsize);
5172 		return -EFAULT;
5173 	}
5174 
5175 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5176 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5177 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5178 		return 0;
5179 	}
5180 
5181 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5182 
5183 	sgt = userptr->sgt;
5184 	dma_desc_cnt = 0;
5185 
5186 	for_each_sgtable_dma_sg(sgt, sg, count) {
5187 		len = sg_dma_len(sg);
5188 		dma_addr = sg_dma_address(sg);
5189 
5190 		if (len == 0)
5191 			break;
5192 
5193 		while ((count + 1) < sgt->nents) {
5194 			sg_next_iter = sg_next(sg);
5195 			len_next = sg_dma_len(sg_next_iter);
5196 			dma_addr_next = sg_dma_address(sg_next_iter);
5197 
5198 			if (len_next == 0)
5199 				break;
5200 
5201 			if ((dma_addr + len == dma_addr_next) &&
5202 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5203 				len += len_next;
5204 				count++;
5205 				sg = sg_next_iter;
5206 			} else {
5207 				break;
5208 			}
5209 		}
5210 
5211 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5212 		if (likely(dma_desc_cnt))
5213 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5214 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5215 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5216 		new_dma_pkt->tsize = cpu_to_le32(len);
5217 
5218 		if (dir == DMA_TO_DEVICE) {
5219 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5220 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5221 		} else {
5222 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5223 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5224 		}
5225 
5226 		if (!user_memset)
5227 			device_memory_addr += len;
5228 		dma_desc_cnt++;
5229 		new_dma_pkt++;
5230 	}
5231 
5232 	if (!dma_desc_cnt) {
5233 		dev_err(hdev->dev,
5234 			"Error of 0 SG entries when patching DMA packet\n");
5235 		return -EFAULT;
5236 	}
5237 
5238 	/* Fix the last dma packet - wrcomp must be as user set it */
5239 	new_dma_pkt--;
5240 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5241 
5242 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5243 
5244 	return 0;
5245 }
5246 
5247 static int gaudi_patch_cb(struct hl_device *hdev,
5248 				struct hl_cs_parser *parser)
5249 {
5250 	u32 cb_parsed_length = 0;
5251 	u32 cb_patched_cur_length = 0;
5252 	int rc = 0;
5253 
5254 	/* cb_user_size is more than 0 so loop will always be executed */
5255 	while (cb_parsed_length < parser->user_cb_size) {
5256 		enum packet_id pkt_id;
5257 		u16 pkt_size;
5258 		u32 new_pkt_size = 0;
5259 		struct gaudi_packet *user_pkt, *kernel_pkt;
5260 
5261 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5262 		kernel_pkt = parser->patched_cb->kernel_address +
5263 					cb_patched_cur_length;
5264 
5265 		pkt_id = (enum packet_id) (
5266 				(le64_to_cpu(user_pkt->header) &
5267 				PACKET_HEADER_PACKET_ID_MASK) >>
5268 					PACKET_HEADER_PACKET_ID_SHIFT);
5269 
5270 		if (!validate_packet_id(pkt_id)) {
5271 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5272 			rc = -EINVAL;
5273 			break;
5274 		}
5275 
5276 		pkt_size = gaudi_packet_sizes[pkt_id];
5277 		cb_parsed_length += pkt_size;
5278 		if (cb_parsed_length > parser->user_cb_size) {
5279 			dev_err(hdev->dev,
5280 				"packet 0x%x is out of CB boundary\n", pkt_id);
5281 			rc = -EINVAL;
5282 			break;
5283 		}
5284 
5285 		switch (pkt_id) {
5286 		case PACKET_LIN_DMA:
5287 			rc = gaudi_patch_dma_packet(hdev, parser,
5288 					(struct packet_lin_dma *) user_pkt,
5289 					(struct packet_lin_dma *) kernel_pkt,
5290 					&new_pkt_size);
5291 			cb_patched_cur_length += new_pkt_size;
5292 			break;
5293 
5294 		case PACKET_MSG_PROT:
5295 			dev_err(hdev->dev,
5296 				"User not allowed to use MSG_PROT\n");
5297 			rc = -EPERM;
5298 			break;
5299 
5300 		case PACKET_CP_DMA:
5301 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5302 			rc = -EPERM;
5303 			break;
5304 
5305 		case PACKET_STOP:
5306 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5307 			rc = -EPERM;
5308 			break;
5309 
5310 		case PACKET_WREG_32:
5311 		case PACKET_WREG_BULK:
5312 		case PACKET_MSG_LONG:
5313 		case PACKET_MSG_SHORT:
5314 		case PACKET_REPEAT:
5315 		case PACKET_FENCE:
5316 		case PACKET_NOP:
5317 		case PACKET_ARB_POINT:
5318 		case PACKET_LOAD_AND_EXE:
5319 			memcpy(kernel_pkt, user_pkt, pkt_size);
5320 			cb_patched_cur_length += pkt_size;
5321 			break;
5322 
5323 		default:
5324 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5325 				pkt_id);
5326 			rc = -EINVAL;
5327 			break;
5328 		}
5329 
5330 		if (rc)
5331 			break;
5332 	}
5333 
5334 	return rc;
5335 }
5336 
5337 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5338 		struct hl_cs_parser *parser)
5339 {
5340 	u64 handle;
5341 	u32 patched_cb_size;
5342 	struct hl_cb *user_cb;
5343 	int rc;
5344 
5345 	/*
5346 	 * The new CB should have space at the end for two MSG_PROT packets:
5347 	 * 1. Optional NOP padding for cacheline alignment
5348 	 * 2. A packet that will act as a completion packet
5349 	 * 3. A packet that will generate MSI interrupt
5350 	 */
5351 	if (parser->completion)
5352 		parser->patched_cb_size = parser->user_cb_size +
5353 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5354 	else
5355 		parser->patched_cb_size = parser->user_cb_size;
5356 
5357 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5358 				parser->patched_cb_size, false, false,
5359 				&handle);
5360 
5361 	if (rc) {
5362 		dev_err(hdev->dev,
5363 			"Failed to allocate patched CB for DMA CS %d\n",
5364 			rc);
5365 		return rc;
5366 	}
5367 
5368 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5369 	/* hl_cb_get should never fail */
5370 	if (!parser->patched_cb) {
5371 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5372 		rc = -EFAULT;
5373 		goto out;
5374 	}
5375 
5376 	/*
5377 	 * We are protected from overflow because the check
5378 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5379 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5380 	 *
5381 	 * There is no option to reach here without going through that check because:
5382 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5383 	 *    an external queue.
5384 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5385 	 */
5386 	memcpy(parser->patched_cb->kernel_address,
5387 		parser->user_cb->kernel_address,
5388 		parser->user_cb_size);
5389 
5390 	patched_cb_size = parser->patched_cb_size;
5391 
5392 	/* Validate patched CB instead of user CB */
5393 	user_cb = parser->user_cb;
5394 	parser->user_cb = parser->patched_cb;
5395 	rc = gaudi_validate_cb(hdev, parser, true);
5396 	parser->user_cb = user_cb;
5397 
5398 	if (rc) {
5399 		hl_cb_put(parser->patched_cb);
5400 		goto out;
5401 	}
5402 
5403 	if (patched_cb_size != parser->patched_cb_size) {
5404 		dev_err(hdev->dev, "user CB size mismatch\n");
5405 		hl_cb_put(parser->patched_cb);
5406 		rc = -EINVAL;
5407 		goto out;
5408 	}
5409 
5410 out:
5411 	/*
5412 	 * Always call cb destroy here because we still have 1 reference
5413 	 * to it by calling cb_get earlier. After the job will be completed,
5414 	 * cb_put will release it, but here we want to remove it from the
5415 	 * idr
5416 	 */
5417 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5418 
5419 	return rc;
5420 }
5421 
5422 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5423 		struct hl_cs_parser *parser)
5424 {
5425 	u64 handle;
5426 	int rc;
5427 
5428 	rc = gaudi_validate_cb(hdev, parser, false);
5429 
5430 	if (rc)
5431 		goto free_userptr;
5432 
5433 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5434 				parser->patched_cb_size, false, false,
5435 				&handle);
5436 	if (rc) {
5437 		dev_err(hdev->dev,
5438 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5439 		goto free_userptr;
5440 	}
5441 
5442 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5443 	/* hl_cb_get should never fail here */
5444 	if (!parser->patched_cb) {
5445 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5446 		rc = -EFAULT;
5447 		goto out;
5448 	}
5449 
5450 	rc = gaudi_patch_cb(hdev, parser);
5451 
5452 	if (rc)
5453 		hl_cb_put(parser->patched_cb);
5454 
5455 out:
5456 	/*
5457 	 * Always call cb destroy here because we still have 1 reference
5458 	 * to it by calling cb_get earlier. After the job will be completed,
5459 	 * cb_put will release it, but here we want to remove it from the
5460 	 * idr
5461 	 */
5462 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5463 
5464 free_userptr:
5465 	if (rc)
5466 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5467 	return rc;
5468 }
5469 
5470 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5471 					struct hl_cs_parser *parser)
5472 {
5473 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5474 	struct gaudi_device *gaudi = hdev->asic_specific;
5475 	u32 nic_queue_offset, nic_mask_q_id;
5476 
5477 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5478 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5479 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5480 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5481 
5482 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5483 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5484 			return -EINVAL;
5485 		}
5486 	}
5487 
5488 	/* For internal queue jobs just check if CB address is valid */
5489 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5490 					parser->user_cb_size,
5491 					asic_prop->sram_user_base_address,
5492 					asic_prop->sram_end_address))
5493 		return 0;
5494 
5495 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5496 					parser->user_cb_size,
5497 					asic_prop->dram_user_base_address,
5498 					asic_prop->dram_end_address))
5499 		return 0;
5500 
5501 	/* PMMU and HPMMU addresses are equal, check only one of them */
5502 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5503 					parser->user_cb_size,
5504 					asic_prop->pmmu.start_addr,
5505 					asic_prop->pmmu.end_addr))
5506 		return 0;
5507 
5508 	dev_err(hdev->dev,
5509 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5510 		parser->user_cb, parser->user_cb_size);
5511 
5512 	return -EFAULT;
5513 }
5514 
5515 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5516 {
5517 	struct gaudi_device *gaudi = hdev->asic_specific;
5518 
5519 	if (parser->queue_type == QUEUE_TYPE_INT)
5520 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5521 
5522 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5523 		return gaudi_parse_cb_mmu(hdev, parser);
5524 	else
5525 		return gaudi_parse_cb_no_mmu(hdev, parser);
5526 }
5527 
5528 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5529 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5530 				u32 msi_vec, bool eb)
5531 {
5532 	struct packet_msg_prot *cq_pkt;
5533 	struct packet_nop *cq_padding;
5534 	u64 msi_addr;
5535 	u32 tmp;
5536 
5537 	cq_padding = kernel_address + original_len;
5538 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5539 
5540 	while ((void *)cq_padding < (void *)cq_pkt) {
5541 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5542 		cq_padding++;
5543 	}
5544 
5545 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5546 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5547 
5548 	if (eb)
5549 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5550 
5551 	cq_pkt->ctl = cpu_to_le32(tmp);
5552 	cq_pkt->value = cpu_to_le32(cq_val);
5553 	cq_pkt->addr = cpu_to_le64(cq_addr);
5554 
5555 	cq_pkt++;
5556 
5557 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5558 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5559 	cq_pkt->ctl = cpu_to_le32(tmp);
5560 	cq_pkt->value = cpu_to_le32(1);
5561 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5562 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5563 }
5564 
5565 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5566 {
5567 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5568 }
5569 
5570 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5571 					u32 size, u64 val)
5572 {
5573 	struct packet_lin_dma *lin_dma_pkt;
5574 	struct hl_cs_job *job;
5575 	u32 cb_size, ctl, err_cause;
5576 	struct hl_cb *cb;
5577 	int rc;
5578 
5579 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5580 	if (!cb)
5581 		return -EFAULT;
5582 
5583 	lin_dma_pkt = cb->kernel_address;
5584 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5585 	cb_size = sizeof(*lin_dma_pkt);
5586 
5587 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5588 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5589 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5590 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5591 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5592 
5593 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5594 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5595 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5596 	lin_dma_pkt->tsize = cpu_to_le32(size);
5597 
5598 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5599 	if (!job) {
5600 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5601 		rc = -ENOMEM;
5602 		goto release_cb;
5603 	}
5604 
5605 	/* Verify DMA is OK */
5606 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5607 	if (err_cause && !hdev->init_done) {
5608 		dev_dbg(hdev->dev,
5609 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5610 			err_cause);
5611 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5612 	}
5613 
5614 	job->id = 0;
5615 	job->user_cb = cb;
5616 	atomic_inc(&job->user_cb->cs_cnt);
5617 	job->user_cb_size = cb_size;
5618 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5619 	job->patched_cb = job->user_cb;
5620 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5621 
5622 	hl_debugfs_add_job(hdev, job);
5623 
5624 	rc = gaudi_send_job_on_qman0(hdev, job);
5625 	hl_debugfs_remove_job(hdev, job);
5626 	kfree(job);
5627 	atomic_dec(&cb->cs_cnt);
5628 
5629 	/* Verify DMA is OK */
5630 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5631 	if (err_cause) {
5632 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5633 		rc = -EIO;
5634 		if (!hdev->init_done) {
5635 			dev_dbg(hdev->dev,
5636 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5637 				err_cause);
5638 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5639 		}
5640 	}
5641 
5642 release_cb:
5643 	hl_cb_put(cb);
5644 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5645 
5646 	return rc;
5647 }
5648 
5649 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5650 					u32 num_regs, u32 val)
5651 {
5652 	struct packet_msg_long *pkt;
5653 	struct hl_cs_job *job;
5654 	u32 cb_size, ctl;
5655 	struct hl_cb *cb;
5656 	int i, rc;
5657 
5658 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5659 
5660 	if (cb_size > SZ_2M) {
5661 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5662 		return -ENOMEM;
5663 	}
5664 
5665 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5666 	if (!cb)
5667 		return -EFAULT;
5668 
5669 	pkt = cb->kernel_address;
5670 
5671 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5672 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5673 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5674 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5675 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5676 
5677 	for (i = 0; i < num_regs ; i++, pkt++) {
5678 		pkt->ctl = cpu_to_le32(ctl);
5679 		pkt->value = cpu_to_le32(val);
5680 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5681 	}
5682 
5683 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5684 	if (!job) {
5685 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5686 		rc = -ENOMEM;
5687 		goto release_cb;
5688 	}
5689 
5690 	job->id = 0;
5691 	job->user_cb = cb;
5692 	atomic_inc(&job->user_cb->cs_cnt);
5693 	job->user_cb_size = cb_size;
5694 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5695 	job->patched_cb = job->user_cb;
5696 	job->job_cb_size = cb_size;
5697 
5698 	hl_debugfs_add_job(hdev, job);
5699 
5700 	rc = gaudi_send_job_on_qman0(hdev, job);
5701 	hl_debugfs_remove_job(hdev, job);
5702 	kfree(job);
5703 	atomic_dec(&cb->cs_cnt);
5704 
5705 release_cb:
5706 	hl_cb_put(cb);
5707 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5708 
5709 	return rc;
5710 }
5711 
5712 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5713 {
5714 	u64 base_addr;
5715 	u32 num_regs;
5716 	int rc;
5717 
5718 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5719 	num_regs = NUM_OF_SOB_IN_BLOCK;
5720 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721 	if (rc) {
5722 		dev_err(hdev->dev, "failed resetting SM registers");
5723 		return -ENOMEM;
5724 	}
5725 
5726 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5727 	num_regs = NUM_OF_SOB_IN_BLOCK;
5728 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729 	if (rc) {
5730 		dev_err(hdev->dev, "failed resetting SM registers");
5731 		return -ENOMEM;
5732 	}
5733 
5734 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5735 	num_regs = NUM_OF_SOB_IN_BLOCK;
5736 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737 	if (rc) {
5738 		dev_err(hdev->dev, "failed resetting SM registers");
5739 		return -ENOMEM;
5740 	}
5741 
5742 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5743 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5744 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745 	if (rc) {
5746 		dev_err(hdev->dev, "failed resetting SM registers");
5747 		return -ENOMEM;
5748 	}
5749 
5750 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5751 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5752 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753 	if (rc) {
5754 		dev_err(hdev->dev, "failed resetting SM registers");
5755 		return -ENOMEM;
5756 	}
5757 
5758 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5759 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5760 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5761 	if (rc) {
5762 		dev_err(hdev->dev, "failed resetting SM registers");
5763 		return -ENOMEM;
5764 	}
5765 
5766 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5767 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5768 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5769 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5770 	if (rc) {
5771 		dev_err(hdev->dev, "failed resetting SM registers");
5772 		return -ENOMEM;
5773 	}
5774 
5775 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5776 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5777 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5778 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5779 	if (rc) {
5780 		dev_err(hdev->dev, "failed resetting SM registers");
5781 		return -ENOMEM;
5782 	}
5783 
5784 	return 0;
5785 }
5786 
5787 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5788 {
5789 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5790 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5791 	int i;
5792 
5793 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5794 		u64 sob_addr = CFG_BASE +
5795 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5796 				(i * sob_delta);
5797 		u32 dma_offset = i * DMA_CORE_OFFSET;
5798 
5799 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5800 				lower_32_bits(sob_addr));
5801 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5802 				upper_32_bits(sob_addr));
5803 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5804 
5805 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5806 		 * modified by the user for SRAM reduction
5807 		 */
5808 		if (i > 1)
5809 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5810 								0x00000001);
5811 	}
5812 }
5813 
5814 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5815 {
5816 	u32 qman_offset;
5817 	int i;
5818 
5819 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5820 		qman_offset = i * DMA_QMAN_OFFSET;
5821 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5822 	}
5823 
5824 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5825 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5826 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5827 	}
5828 
5829 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5830 		qman_offset = i * TPC_QMAN_OFFSET;
5831 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5832 	}
5833 
5834 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5835 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5836 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5837 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5838 	}
5839 }
5840 
5841 static int gaudi_restore_user_registers(struct hl_device *hdev)
5842 {
5843 	int rc;
5844 
5845 	rc = gaudi_restore_sm_registers(hdev);
5846 	if (rc)
5847 		return rc;
5848 
5849 	gaudi_restore_dma_registers(hdev);
5850 	gaudi_restore_qm_registers(hdev);
5851 
5852 	return 0;
5853 }
5854 
5855 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5856 {
5857 	return 0;
5858 }
5859 
5860 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5861 {
5862 	u32 size = hdev->asic_prop.mmu_pgt_size +
5863 			hdev->asic_prop.mmu_cache_mng_size;
5864 	struct gaudi_device *gaudi = hdev->asic_specific;
5865 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5866 
5867 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5868 		return 0;
5869 
5870 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5871 }
5872 
5873 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5874 {
5875 
5876 }
5877 
5878 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5879 					u32 size_to_dma, dma_addr_t dma_addr)
5880 {
5881 	u32 err_cause, val;
5882 	u64 dma_offset;
5883 	int rc;
5884 
5885 	dma_offset = dma_id * DMA_CORE_OFFSET;
5886 
5887 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5888 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5889 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5890 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5891 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5892 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5893 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5894 
5895 	rc = hl_poll_timeout(
5896 		hdev,
5897 		mmDMA0_CORE_STS0 + dma_offset,
5898 		val,
5899 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5900 		0,
5901 		1000000);
5902 
5903 	if (rc) {
5904 		dev_err(hdev->dev,
5905 			"DMA %d timed-out during reading of 0x%llx\n",
5906 			dma_id, addr);
5907 		return -EIO;
5908 	}
5909 
5910 	/* Verify DMA is OK */
5911 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5912 	if (err_cause) {
5913 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5914 		dev_dbg(hdev->dev,
5915 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5916 			err_cause);
5917 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5918 
5919 		return -EIO;
5920 	}
5921 
5922 	return 0;
5923 }
5924 
5925 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5926 				void *blob_addr)
5927 {
5928 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5929 	u32 qm_glbl_sts0, qm_cgm_sts;
5930 	u64 dma_offset, qm_offset;
5931 	dma_addr_t dma_addr;
5932 	void *kernel_addr;
5933 	bool is_eng_idle;
5934 	int rc = 0, dma_id;
5935 
5936 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5937 
5938 	if (!kernel_addr)
5939 		return -ENOMEM;
5940 
5941 	hdev->asic_funcs->hw_queues_lock(hdev);
5942 
5943 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5944 	dma_offset = dma_id * DMA_CORE_OFFSET;
5945 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5946 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5947 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5948 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5949 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5950 		      IS_DMA_IDLE(dma_core_sts0);
5951 
5952 	if (!is_eng_idle) {
5953 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5954 		dma_offset = dma_id * DMA_CORE_OFFSET;
5955 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5956 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5957 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5958 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5959 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5960 			      IS_DMA_IDLE(dma_core_sts0);
5961 
5962 		if (!is_eng_idle) {
5963 			dev_err_ratelimited(hdev->dev,
5964 				"Can't read via DMA because it is BUSY\n");
5965 			rc = -EAGAIN;
5966 			goto out;
5967 		}
5968 	}
5969 
5970 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5971 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5972 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5973 
5974 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5975 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5976 	 * ASID
5977 	 */
5978 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5979 
5980 	/* Verify DMA is OK */
5981 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5982 	if (err_cause) {
5983 		dev_dbg(hdev->dev,
5984 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5985 			err_cause);
5986 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5987 	}
5988 
5989 	pos = 0;
5990 	size_left = size;
5991 	size_to_dma = SZ_2M;
5992 
5993 	while (size_left > 0) {
5994 
5995 		if (size_left < SZ_2M)
5996 			size_to_dma = size_left;
5997 
5998 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5999 						dma_addr);
6000 		if (rc)
6001 			break;
6002 
6003 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6004 
6005 		if (size_left <= SZ_2M)
6006 			break;
6007 
6008 		pos += SZ_2M;
6009 		addr += SZ_2M;
6010 		size_left -= SZ_2M;
6011 	}
6012 
6013 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6014 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6015 	 * ASID
6016 	 */
6017 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6018 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6019 
6020 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6021 
6022 out:
6023 	hdev->asic_funcs->hw_queues_unlock(hdev);
6024 
6025 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6026 
6027 	return rc;
6028 }
6029 
6030 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6031 {
6032 	struct gaudi_device *gaudi = hdev->asic_specific;
6033 
6034 	if (hdev->reset_info.hard_reset_pending)
6035 		return U64_MAX;
6036 
6037 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6038 			(addr - gaudi->hbm_bar_cur_addr));
6039 }
6040 
6041 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6042 {
6043 	struct gaudi_device *gaudi = hdev->asic_specific;
6044 
6045 	if (hdev->reset_info.hard_reset_pending)
6046 		return;
6047 
6048 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6049 			(addr - gaudi->hbm_bar_cur_addr));
6050 }
6051 
6052 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6053 {
6054 	/* mask to zero the MMBP and ASID bits */
6055 	WREG32_AND(reg, ~0x7FF);
6056 	WREG32_OR(reg, asid);
6057 }
6058 
6059 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6060 {
6061 	struct gaudi_device *gaudi = hdev->asic_specific;
6062 
6063 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6064 		return;
6065 
6066 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6067 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6068 		return;
6069 	}
6070 
6071 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6072 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6073 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6074 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6075 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6076 
6077 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6078 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6079 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6080 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6081 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6082 
6083 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6085 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6086 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6087 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6088 
6089 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6091 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6092 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6093 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6094 
6095 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6097 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6098 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6099 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6100 
6101 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6103 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6104 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6105 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6106 
6107 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6109 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6110 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6111 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6112 
6113 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6115 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6116 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6117 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6118 
6119 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6120 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6121 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6122 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6123 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6124 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6125 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6126 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6127 
6128 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6129 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6130 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6133 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6134 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6135 
6136 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6137 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6138 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6141 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6143 
6144 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6146 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6151 
6152 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6159 
6160 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6161 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6167 
6168 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6175 
6176 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6183 
6184 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6185 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6191 
6192 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6202 
6203 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6213 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6214 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6215 
6216 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6217 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6218 				asid);
6219 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6220 				asid);
6221 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6222 				asid);
6223 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6224 				asid);
6225 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6226 				asid);
6227 	}
6228 
6229 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6230 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6231 				asid);
6232 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6233 				asid);
6234 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6235 				asid);
6236 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6237 				asid);
6238 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6239 				asid);
6240 	}
6241 
6242 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6243 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6244 				asid);
6245 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6246 				asid);
6247 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6248 				asid);
6249 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6250 				asid);
6251 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6252 				asid);
6253 	}
6254 
6255 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6256 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6257 				asid);
6258 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6259 				asid);
6260 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6261 				asid);
6262 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6263 				asid);
6264 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6265 				asid);
6266 	}
6267 
6268 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6269 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6270 				asid);
6271 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6272 				asid);
6273 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6274 				asid);
6275 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6276 				asid);
6277 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6278 				asid);
6279 	}
6280 
6281 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6282 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6283 				asid);
6284 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6285 				asid);
6286 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6287 				asid);
6288 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6289 				asid);
6290 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6291 				asid);
6292 	}
6293 
6294 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6296 				asid);
6297 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6298 				asid);
6299 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6300 				asid);
6301 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6302 				asid);
6303 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6304 				asid);
6305 	}
6306 
6307 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6309 				asid);
6310 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6311 				asid);
6312 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6313 				asid);
6314 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6315 				asid);
6316 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6317 				asid);
6318 	}
6319 
6320 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6322 				asid);
6323 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6324 				asid);
6325 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6326 				asid);
6327 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6328 				asid);
6329 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6330 				asid);
6331 	}
6332 
6333 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6335 				asid);
6336 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6337 				asid);
6338 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6339 				asid);
6340 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6341 				asid);
6342 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6343 				asid);
6344 	}
6345 
6346 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6347 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6348 }
6349 
6350 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6351 		struct hl_cs_job *job)
6352 {
6353 	struct packet_msg_prot *fence_pkt;
6354 	u32 *fence_ptr;
6355 	dma_addr_t fence_dma_addr;
6356 	struct hl_cb *cb;
6357 	u32 tmp, timeout, dma_offset;
6358 	int rc;
6359 
6360 	if (hdev->pldm)
6361 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6362 	else
6363 		timeout = HL_DEVICE_TIMEOUT_USEC;
6364 
6365 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6366 	if (!fence_ptr) {
6367 		dev_err(hdev->dev,
6368 			"Failed to allocate fence memory for QMAN0\n");
6369 		return -ENOMEM;
6370 	}
6371 
6372 	cb = job->patched_cb;
6373 
6374 	fence_pkt = cb->kernel_address +
6375 			job->job_cb_size - sizeof(struct packet_msg_prot);
6376 
6377 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6378 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6379 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6380 
6381 	fence_pkt->ctl = cpu_to_le32(tmp);
6382 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6383 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6384 
6385 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6386 
6387 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6388 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6389 
6390 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6391 					job->job_cb_size, cb->bus_address);
6392 	if (rc) {
6393 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6394 		goto free_fence_ptr;
6395 	}
6396 
6397 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6398 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6399 				timeout, true);
6400 
6401 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6402 
6403 	if (rc == -ETIMEDOUT) {
6404 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6405 		goto free_fence_ptr;
6406 	}
6407 
6408 free_fence_ptr:
6409 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6410 
6411 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6412 	return rc;
6413 }
6414 
6415 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6416 {
6417 	if (event_type >= GAUDI_EVENT_SIZE)
6418 		goto event_not_supported;
6419 
6420 	if (!gaudi_irq_map_table[event_type].valid)
6421 		goto event_not_supported;
6422 
6423 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6424 
6425 	return;
6426 
6427 event_not_supported:
6428 	snprintf(desc, size, "N/A");
6429 }
6430 
6431 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6432 							bool is_write, u16 *engine_id_1,
6433 							u16 *engine_id_2)
6434 {
6435 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6436 
6437 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6438 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6439 
6440 	switch (x_y) {
6441 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6442 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6443 		dma_id[0] = 0;
6444 		dma_id[1] = 2;
6445 		break;
6446 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6447 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6448 		dma_id[0] = 1;
6449 		dma_id[1] = 3;
6450 		break;
6451 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6452 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6453 		dma_id[0] = 4;
6454 		dma_id[1] = 6;
6455 		break;
6456 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6457 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6458 		dma_id[0] = 5;
6459 		dma_id[1] = 7;
6460 		break;
6461 	default:
6462 		goto unknown_initiator;
6463 	}
6464 
6465 	for (i = 0 ; i < 2 ; i++) {
6466 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6467 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6468 	}
6469 
6470 	switch (x_y) {
6471 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6472 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6473 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6474 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6475 			return "DMA0";
6476 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6477 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6478 			return "DMA2";
6479 		} else {
6480 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6481 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6482 			return "DMA0 or DMA2";
6483 		}
6484 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6485 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6486 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6487 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6488 			return "DMA1";
6489 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6490 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6491 			return "DMA3";
6492 		} else {
6493 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6494 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6495 			return "DMA1 or DMA3";
6496 		}
6497 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6498 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6499 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6500 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6501 			return "DMA4";
6502 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6503 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6504 			return "DMA6";
6505 		} else {
6506 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6507 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6508 			return "DMA4 or DMA6";
6509 		}
6510 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6511 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6512 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6513 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6514 			return "DMA5";
6515 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6516 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6517 			return "DMA7";
6518 		} else {
6519 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6520 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6521 			return "DMA5 or DMA7";
6522 		}
6523 	}
6524 
6525 unknown_initiator:
6526 	return "unknown initiator";
6527 }
6528 
6529 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6530 							u16 *engine_id_1, u16 *engine_id_2)
6531 {
6532 	u32 val, x_y, axi_id;
6533 
6534 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6535 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6536 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6537 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6538 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6539 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6540 
6541 	switch (x_y) {
6542 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6543 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6544 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6545 			return "TPC0";
6546 		}
6547 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6548 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6549 			return "NIC0";
6550 		}
6551 		break;
6552 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6553 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6554 		return "TPC1";
6555 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6556 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6557 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6558 		return "MME0";
6559 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6560 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6561 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6562 		return "MME1";
6563 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6564 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6565 		return "TPC2";
6566 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6567 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6568 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6569 			return "TPC3";
6570 		}
6571 		/* PCI, CPU or PSOC does not have engine id*/
6572 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6573 			return "PCI";
6574 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6575 			return "CPU";
6576 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6577 			return "PSOC";
6578 		break;
6579 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6580 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6581 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6582 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6583 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6584 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6585 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6586 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6587 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6588 				engine_id_1, engine_id_2);
6589 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6590 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6591 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6592 			return "TPC4";
6593 		}
6594 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6595 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6596 			return "NIC1";
6597 		}
6598 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6599 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6600 			return "NIC2";
6601 		}
6602 		break;
6603 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6604 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6605 		return "TPC5";
6606 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6607 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6608 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6609 		return "MME2";
6610 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6611 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6612 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6613 		return "MME3";
6614 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6615 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6616 		return "TPC6";
6617 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6618 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6619 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6620 			return "TPC7";
6621 		}
6622 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6623 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6624 			return "NIC4";
6625 		}
6626 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6627 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6628 			return "NIC5";
6629 		}
6630 		break;
6631 	default:
6632 		break;
6633 	}
6634 
6635 	dev_err(hdev->dev,
6636 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6637 		val,
6638 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6639 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6640 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6641 			RAZWI_INITIATOR_AXI_ID_MASK);
6642 
6643 	return "unknown initiator";
6644 }
6645 
6646 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6647 						u16 *engine_id_2, bool *is_read, bool *is_write)
6648 {
6649 
6650 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6651 		dev_err_ratelimited(hdev->dev,
6652 			"RAZWI event caused by illegal write of %s\n",
6653 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6654 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6655 		*is_write = true;
6656 	}
6657 
6658 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6659 		dev_err_ratelimited(hdev->dev,
6660 			"RAZWI event caused by illegal read of %s\n",
6661 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6662 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6663 		*is_read = true;
6664 	}
6665 }
6666 
6667 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6668 {
6669 	struct gaudi_device *gaudi = hdev->asic_specific;
6670 	u32 val;
6671 
6672 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6673 		return;
6674 
6675 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6676 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6677 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6678 		*addr <<= 32;
6679 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6680 
6681 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6682 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6683 
6684 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6685 	}
6686 
6687 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6688 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6689 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6690 		*addr <<= 32;
6691 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6692 
6693 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6694 
6695 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6696 	}
6697 }
6698 
6699 /*
6700  *  +-------------------+------------------------------------------------------+
6701  *  | Configuration Reg |                     Description                      |
6702  *  |      Address      |                                                      |
6703  *  +-------------------+------------------------------------------------------+
6704  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6705  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6706  *  |                   |0xF34 memory wrappers 63:32                           |
6707  *  |                   |0xF38 memory wrappers 95:64                           |
6708  *  |                   |0xF3C memory wrappers 127:96                          |
6709  *  +-------------------+------------------------------------------------------+
6710  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6711  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6712  *  |                   |0xF44 memory wrappers 63:32                           |
6713  *  |                   |0xF48 memory wrappers 95:64                           |
6714  *  |                   |0xF4C memory wrappers 127:96                          |
6715  *  +-------------------+------------------------------------------------------+
6716  */
6717 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6718 		struct ecc_info_extract_params *params, u64 *ecc_address,
6719 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6720 {
6721 	u32 i, num_mem_regs, reg, err_bit;
6722 	u64 err_addr, err_word = 0;
6723 
6724 	num_mem_regs = params->num_memories / 32 +
6725 			((params->num_memories % 32) ? 1 : 0);
6726 
6727 	if (params->block_address >= CFG_BASE)
6728 		params->block_address -= CFG_BASE;
6729 
6730 	if (params->derr)
6731 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6732 	else
6733 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6734 
6735 	/* Set invalid wrapper index */
6736 	*memory_wrapper_idx = 0xFF;
6737 
6738 	/* Iterate through memory wrappers, a single bit must be set */
6739 	for (i = 0 ; i < num_mem_regs ; i++) {
6740 		err_addr += i * 4;
6741 		err_word = RREG32(err_addr);
6742 		if (err_word) {
6743 			err_bit = __ffs(err_word);
6744 			*memory_wrapper_idx = err_bit + (32 * i);
6745 			break;
6746 		}
6747 	}
6748 
6749 	if (*memory_wrapper_idx == 0xFF) {
6750 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6751 		return -EINVAL;
6752 	}
6753 
6754 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6755 			*memory_wrapper_idx);
6756 
6757 	*ecc_address =
6758 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6759 	*ecc_syndrom =
6760 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6761 
6762 	/* Clear error indication */
6763 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6764 	if (params->derr)
6765 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6766 	else
6767 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6768 
6769 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6770 
6771 	return 0;
6772 }
6773 
6774 /*
6775  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6776  *
6777  * @idx: the current pi/ci value
6778  * @q_len: the queue length (power of 2)
6779  *
6780  * @return the cyclically decremented index
6781  */
6782 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6783 {
6784 	u32 mask = q_len - 1;
6785 
6786 	/*
6787 	 * modular decrement is equivalent to adding (queue_size -1)
6788 	 * later we take LSBs to make sure the value is in the
6789 	 * range [0, queue_len - 1]
6790 	 */
6791 	return (idx + q_len - 1) & mask;
6792 }
6793 
6794 /**
6795  * gaudi_handle_sw_config_stream_data - print SW config stream data
6796  *
6797  * @hdev: pointer to the habanalabs device structure
6798  * @stream: the QMAN's stream
6799  * @qman_base: base address of QMAN registers block
6800  * @event_mask: mask of the last events occurred
6801  */
6802 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6803 						u64 qman_base, u64 event_mask)
6804 {
6805 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6806 	u32 cq_ptr_lo_off, size;
6807 
6808 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6809 
6810 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6811 						stream * cq_ptr_lo_off;
6812 	cq_ptr_hi = cq_ptr_lo +
6813 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6814 	cq_tsize = cq_ptr_lo +
6815 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6816 
6817 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6818 	size = RREG32(cq_tsize);
6819 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6820 							stream, cq_ptr, size);
6821 
6822 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6823 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6824 		hdev->captured_err_info.undef_opcode.cq_size = size;
6825 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6826 	}
6827 }
6828 
6829 /**
6830  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6831  *
6832  * @hdev: pointer to the habanalabs device structure
6833  * @qid_base: first QID of the QMAN (out of 4 streams)
6834  * @stream: the QMAN's stream
6835  * @qman_base: base address of QMAN registers block
6836  * @event_mask: mask of the last events occurred
6837  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6838  */
6839 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6840 						u32 stream, u64 qman_base,
6841 						u64 event_mask,
6842 						bool pr_sw_conf)
6843 {
6844 	u32 ci, qm_ci_stream_off, queue_len;
6845 	struct hl_hw_queue *q;
6846 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6847 	int i;
6848 
6849 	q = &hdev->kernel_queues[qid_base + stream];
6850 
6851 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6852 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6853 						stream * qm_ci_stream_off;
6854 
6855 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6856 					q->int_queue_len : HL_QUEUE_LENGTH;
6857 
6858 	hdev->asic_funcs->hw_queues_lock(hdev);
6859 
6860 	if (pr_sw_conf)
6861 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6862 
6863 	ci = RREG32(pq_ci);
6864 
6865 	/* we should start printing form ci -1 */
6866 	ci = gaudi_queue_idx_dec(ci, queue_len);
6867 	memset(addr, 0, sizeof(addr));
6868 
6869 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6870 		struct hl_bd *bd;
6871 		u32 len;
6872 
6873 		bd = q->kernel_address;
6874 		bd += ci;
6875 
6876 		len = le32_to_cpu(bd->len);
6877 		/* len 0 means uninitialized entry- break */
6878 		if (!len)
6879 			break;
6880 
6881 		addr[i] = le64_to_cpu(bd->ptr);
6882 
6883 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6884 							stream, ci, addr[i], len);
6885 
6886 		/* get previous ci, wrap if needed */
6887 		ci = gaudi_queue_idx_dec(ci, queue_len);
6888 	}
6889 
6890 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6891 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6892 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6893 
6894 		if (arr_idx == 0) {
6895 			undef_opcode->timestamp = ktime_get();
6896 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6897 		}
6898 
6899 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6900 		undef_opcode->cb_addr_streams_len++;
6901 	}
6902 
6903 	hdev->asic_funcs->hw_queues_unlock(hdev);
6904 }
6905 
6906 /**
6907  * handle_qman_data_on_err - extract QMAN data on error
6908  *
6909  * @hdev: pointer to the habanalabs device structure
6910  * @qid_base: first QID of the QMAN (out of 4 streams)
6911  * @stream: the QMAN's stream
6912  * @qman_base: base address of QMAN registers block
6913  * @event_mask: mask of the last events occurred
6914  *
6915  * This function attempt to exatract as much data as possible on QMAN error.
6916  * On upper CP print the SW config stream data and last 8 PQEs.
6917  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6918  */
6919 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6920 				   u32 stream, u64 qman_base, u64 event_mask)
6921 {
6922 	u32 i;
6923 
6924 	if (stream != QMAN_STREAMS) {
6925 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6926 			qman_base, event_mask, true);
6927 		return;
6928 	}
6929 
6930 	/* handle Lower-CP */
6931 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6932 
6933 	for (i = 0; i < QMAN_STREAMS; i++)
6934 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6935 			qman_base, event_mask, false);
6936 }
6937 
6938 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6939 					  const char *qm_name,
6940 					  u64 qman_base,
6941 					  u32 qid_base,
6942 					  u64 *event_mask)
6943 {
6944 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6945 	u64 glbl_sts_addr, arb_err_addr;
6946 	char reg_desc[32];
6947 
6948 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6949 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6950 
6951 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6952 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6953 		glbl_sts_clr_val = 0;
6954 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6955 
6956 		if (!glbl_sts_val)
6957 			continue;
6958 
6959 		if (i == QMAN_STREAMS)
6960 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6961 		else
6962 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6963 
6964 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6965 			if (glbl_sts_val & BIT(j)) {
6966 				dev_err_ratelimited(hdev->dev,
6967 						"%s %s. err cause: %s\n",
6968 						qm_name, reg_desc,
6969 						gaudi_qman_error_cause[j]);
6970 				glbl_sts_clr_val |= BIT(j);
6971 			}
6972 		}
6973 		/* check for undefined opcode */
6974 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6975 				hdev->captured_err_info.undef_opcode.write_enable) {
6976 			memset(&hdev->captured_err_info.undef_opcode, 0,
6977 						sizeof(hdev->captured_err_info.undef_opcode));
6978 
6979 			hdev->captured_err_info.undef_opcode.write_enable = false;
6980 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6981 		}
6982 
6983 		/* Write 1 clear errors */
6984 		if (!hdev->stop_on_err)
6985 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6986 		else
6987 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6988 	}
6989 
6990 	arb_err_val = RREG32(arb_err_addr);
6991 
6992 	if (!arb_err_val)
6993 		return;
6994 
6995 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6996 		if (arb_err_val & BIT(j)) {
6997 			dev_err_ratelimited(hdev->dev,
6998 					"%s ARB_ERR. err cause: %s\n",
6999 					qm_name,
7000 					gaudi_qman_arb_error_cause[j]);
7001 		}
7002 	}
7003 }
7004 
7005 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7006 		struct hl_eq_sm_sei_data *sei_data)
7007 {
7008 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7009 
7010 	/* Flip the bits as the enum is ordered in the opposite way */
7011 	index = (index ^ 0x3) & 0x3;
7012 
7013 	switch (sei_data->sei_cause) {
7014 	case SM_SEI_SO_OVERFLOW:
7015 		dev_err_ratelimited(hdev->dev,
7016 			"%s SEI Error: SOB Group %u overflow/underflow",
7017 			gaudi_sync_manager_names[index],
7018 			le32_to_cpu(sei_data->sei_log));
7019 		break;
7020 	case SM_SEI_LBW_4B_UNALIGNED:
7021 		dev_err_ratelimited(hdev->dev,
7022 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7023 			gaudi_sync_manager_names[index],
7024 			le32_to_cpu(sei_data->sei_log));
7025 		break;
7026 	case SM_SEI_AXI_RESPONSE_ERR:
7027 		dev_err_ratelimited(hdev->dev,
7028 			"%s SEI Error: AXI ID %u response error",
7029 			gaudi_sync_manager_names[index],
7030 			le32_to_cpu(sei_data->sei_log));
7031 		break;
7032 	default:
7033 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7034 				le32_to_cpu(sei_data->sei_log));
7035 		break;
7036 	}
7037 }
7038 
7039 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7040 		struct hl_eq_ecc_data *ecc_data)
7041 {
7042 	struct ecc_info_extract_params params;
7043 	u64 ecc_address = 0, ecc_syndrom = 0;
7044 	u8 index, memory_wrapper_idx = 0;
7045 	bool extract_info_from_fw;
7046 	int rc;
7047 
7048 	if (hdev->asic_prop.fw_security_enabled) {
7049 		extract_info_from_fw = true;
7050 		goto extract_ecc_info;
7051 	}
7052 
7053 	switch (event_type) {
7054 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7055 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7056 		extract_info_from_fw = true;
7057 		break;
7058 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7059 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7060 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7061 		params.num_memories = 90;
7062 		params.derr = false;
7063 		extract_info_from_fw = false;
7064 		break;
7065 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7066 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7067 		params.block_address =
7068 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7069 		params.num_memories = 90;
7070 		params.derr = true;
7071 		extract_info_from_fw = false;
7072 		break;
7073 	case GAUDI_EVENT_MME0_ACC_SERR:
7074 	case GAUDI_EVENT_MME1_ACC_SERR:
7075 	case GAUDI_EVENT_MME2_ACC_SERR:
7076 	case GAUDI_EVENT_MME3_ACC_SERR:
7077 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7078 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7079 		params.num_memories = 128;
7080 		params.derr = false;
7081 		extract_info_from_fw = false;
7082 		break;
7083 	case GAUDI_EVENT_MME0_ACC_DERR:
7084 	case GAUDI_EVENT_MME1_ACC_DERR:
7085 	case GAUDI_EVENT_MME2_ACC_DERR:
7086 	case GAUDI_EVENT_MME3_ACC_DERR:
7087 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7088 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7089 		params.num_memories = 128;
7090 		params.derr = true;
7091 		extract_info_from_fw = false;
7092 		break;
7093 	case GAUDI_EVENT_MME0_SBAB_SERR:
7094 	case GAUDI_EVENT_MME1_SBAB_SERR:
7095 	case GAUDI_EVENT_MME2_SBAB_SERR:
7096 	case GAUDI_EVENT_MME3_SBAB_SERR:
7097 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7098 		params.block_address =
7099 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7100 		params.num_memories = 33;
7101 		params.derr = false;
7102 		extract_info_from_fw = false;
7103 		break;
7104 	case GAUDI_EVENT_MME0_SBAB_DERR:
7105 	case GAUDI_EVENT_MME1_SBAB_DERR:
7106 	case GAUDI_EVENT_MME2_SBAB_DERR:
7107 	case GAUDI_EVENT_MME3_SBAB_DERR:
7108 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7109 		params.block_address =
7110 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7111 		params.num_memories = 33;
7112 		params.derr = true;
7113 		extract_info_from_fw = false;
7114 		break;
7115 	default:
7116 		return;
7117 	}
7118 
7119 extract_ecc_info:
7120 	if (extract_info_from_fw) {
7121 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7122 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7123 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7124 	} else {
7125 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7126 				&ecc_syndrom, &memory_wrapper_idx);
7127 		if (rc)
7128 			return;
7129 	}
7130 
7131 	dev_err(hdev->dev,
7132 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7133 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7134 }
7135 
7136 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7137 {
7138 	u64 qman_base;
7139 	char desc[32];
7140 	u32 qid_base;
7141 	u8 index;
7142 
7143 	switch (event_type) {
7144 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7145 		index = event_type - GAUDI_EVENT_TPC0_QM;
7146 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7147 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7148 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7149 		break;
7150 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7151 		if (event_type == GAUDI_EVENT_MME0_QM) {
7152 			index = 0;
7153 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7154 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7155 			index = 2;
7156 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7157 		}
7158 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7159 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7160 		break;
7161 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7162 		index = event_type - GAUDI_EVENT_DMA0_QM;
7163 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7164 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7165 		if (index > 1)
7166 			qid_base++;
7167 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7168 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7169 		break;
7170 	case GAUDI_EVENT_NIC0_QM0:
7171 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7172 		qman_base = mmNIC0_QM0_BASE;
7173 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7174 		break;
7175 	case GAUDI_EVENT_NIC0_QM1:
7176 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7177 		qman_base = mmNIC0_QM1_BASE;
7178 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7179 		break;
7180 	case GAUDI_EVENT_NIC1_QM0:
7181 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7182 		qman_base = mmNIC1_QM0_BASE;
7183 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7184 		break;
7185 	case GAUDI_EVENT_NIC1_QM1:
7186 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7187 		qman_base = mmNIC1_QM1_BASE;
7188 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7189 		break;
7190 	case GAUDI_EVENT_NIC2_QM0:
7191 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7192 		qman_base = mmNIC2_QM0_BASE;
7193 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7194 		break;
7195 	case GAUDI_EVENT_NIC2_QM1:
7196 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7197 		qman_base = mmNIC2_QM1_BASE;
7198 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7199 		break;
7200 	case GAUDI_EVENT_NIC3_QM0:
7201 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7202 		qman_base = mmNIC3_QM0_BASE;
7203 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7204 		break;
7205 	case GAUDI_EVENT_NIC3_QM1:
7206 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7207 		qman_base = mmNIC3_QM1_BASE;
7208 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7209 		break;
7210 	case GAUDI_EVENT_NIC4_QM0:
7211 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7212 		qman_base = mmNIC4_QM0_BASE;
7213 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7214 		break;
7215 	case GAUDI_EVENT_NIC4_QM1:
7216 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7217 		qman_base = mmNIC4_QM1_BASE;
7218 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7219 		break;
7220 	default:
7221 		return;
7222 	}
7223 
7224 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7225 }
7226 
7227 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7228 					bool check_razwi, u64 *event_mask)
7229 {
7230 	bool is_read = false, is_write = false;
7231 	u16 engine_id[2], num_of_razwi_eng = 0;
7232 	char desc[64] = "";
7233 	u64 razwi_addr = 0;
7234 	u8 razwi_flags = 0;
7235 
7236 	/*
7237 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7238 	 * engine id it will get valid value.
7239 	 */
7240 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7241 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7242 
7243 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7244 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7245 		event_type, desc);
7246 
7247 	if (check_razwi) {
7248 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7249 						&is_write);
7250 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7251 
7252 		if (is_read)
7253 			razwi_flags |= HL_RAZWI_READ;
7254 		if (is_write)
7255 			razwi_flags |= HL_RAZWI_WRITE;
7256 
7257 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7258 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7259 				num_of_razwi_eng = 2;
7260 			else
7261 				num_of_razwi_eng = 1;
7262 		}
7263 
7264 		if (razwi_flags)
7265 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7266 					razwi_flags, event_mask);
7267 	}
7268 }
7269 
7270 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7271 					struct cpucp_pkt_sync_err *sync_err)
7272 {
7273 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7274 
7275 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7276 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7277 }
7278 
7279 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7280 					struct hl_eq_fw_alive *fw_alive)
7281 {
7282 	dev_err(hdev->dev,
7283 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7284 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7285 		le32_to_cpu(fw_alive->process_id),
7286 		le32_to_cpu(fw_alive->thread_id),
7287 		le64_to_cpu(fw_alive->uptime_seconds));
7288 }
7289 
7290 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7291 						void *data)
7292 {
7293 	char desc[64] = "", *type;
7294 	struct eq_nic_sei_event *eq_nic_sei = data;
7295 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7296 
7297 	switch (eq_nic_sei->axi_error_cause) {
7298 	case RXB:
7299 		type = "RXB";
7300 		break;
7301 	case RXE:
7302 		type = "RXE";
7303 		break;
7304 	case TXS:
7305 		type = "TXS";
7306 		break;
7307 	case TXE:
7308 		type = "TXE";
7309 		break;
7310 	case QPC_RESP:
7311 		type = "QPC_RESP";
7312 		break;
7313 	case NON_AXI_ERR:
7314 		type = "NON_AXI_ERR";
7315 		break;
7316 	case TMR:
7317 		type = "TMR";
7318 		break;
7319 	default:
7320 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7321 			eq_nic_sei->axi_error_cause);
7322 		type = "N/A";
7323 		break;
7324 	}
7325 
7326 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7327 			eq_nic_sei->id);
7328 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7329 		event_type, desc);
7330 }
7331 
7332 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7333 {
7334 	/* GAUDI doesn't support any reset except hard-reset */
7335 	return -EPERM;
7336 }
7337 
7338 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7339 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7340 {
7341 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7342 	int rc = 0;
7343 
7344 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7345 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7346 		if (!hbm_ecc_data) {
7347 			dev_err(hdev->dev, "No FW ECC data");
7348 			return 0;
7349 		}
7350 
7351 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7352 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7354 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7356 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7358 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7359 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7360 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7361 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7362 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7364 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7365 
7366 		dev_err(hdev->dev,
7367 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7368 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7369 		dev_err(hdev->dev,
7370 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7371 			device, ch, hbm_ecc_data->first_addr, type,
7372 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7373 			hbm_ecc_data->dec_cnt);
7374 		return 0;
7375 	}
7376 
7377 	if (hdev->asic_prop.fw_security_enabled) {
7378 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7379 		return 0;
7380 	}
7381 
7382 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7383 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7384 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7385 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7386 		if (val) {
7387 			rc = -EIO;
7388 			dev_err(hdev->dev,
7389 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7390 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7391 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7392 				(val >> 4) & 0x1);
7393 
7394 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7395 			dev_err(hdev->dev,
7396 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7397 				device, ch * 2,
7398 				RREG32(base + ch * 0x1000 + 0x064),
7399 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7400 				(val2 & 0xFF0000) >> 16,
7401 				(val2 & 0xFF000000) >> 24);
7402 		}
7403 
7404 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7405 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7406 		if (val) {
7407 			rc = -EIO;
7408 			dev_err(hdev->dev,
7409 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7410 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7411 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7412 				(val >> 4) & 0x1);
7413 
7414 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7415 			dev_err(hdev->dev,
7416 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7417 				device, ch * 2 + 1,
7418 				RREG32(base + ch * 0x1000 + 0x074),
7419 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7420 				(val2 & 0xFF0000) >> 16,
7421 				(val2 & 0xFF000000) >> 24);
7422 		}
7423 
7424 		/* Clear interrupts */
7425 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7426 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7427 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7428 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7429 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7430 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7431 	}
7432 
7433 	val  = RREG32(base + 0x8F30);
7434 	val2 = RREG32(base + 0x8F34);
7435 	if (val | val2) {
7436 		rc = -EIO;
7437 		dev_err(hdev->dev,
7438 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7439 			device, val, val2);
7440 	}
7441 	val  = RREG32(base + 0x8F40);
7442 	val2 = RREG32(base + 0x8F44);
7443 	if (val | val2) {
7444 		rc = -EIO;
7445 		dev_err(hdev->dev,
7446 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7447 			device, val, val2);
7448 	}
7449 
7450 	return rc;
7451 }
7452 
7453 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7454 {
7455 	switch (hbm_event_type) {
7456 	case GAUDI_EVENT_HBM0_SPI_0:
7457 	case GAUDI_EVENT_HBM0_SPI_1:
7458 		return 0;
7459 	case GAUDI_EVENT_HBM1_SPI_0:
7460 	case GAUDI_EVENT_HBM1_SPI_1:
7461 		return 1;
7462 	case GAUDI_EVENT_HBM2_SPI_0:
7463 	case GAUDI_EVENT_HBM2_SPI_1:
7464 		return 2;
7465 	case GAUDI_EVENT_HBM3_SPI_0:
7466 	case GAUDI_EVENT_HBM3_SPI_1:
7467 		return 3;
7468 	default:
7469 		break;
7470 	}
7471 
7472 	/* Should never happen */
7473 	return 0;
7474 }
7475 
7476 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7477 					char *interrupt_name)
7478 {
7479 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7480 	bool soft_reset_required = false;
7481 
7482 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7483 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7484 
7485 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7486 		if (tpc_interrupts_cause & BIT(i)) {
7487 			dev_err_ratelimited(hdev->dev,
7488 					"TPC%d_%s interrupt cause: %s\n",
7489 					tpc_id, interrupt_name,
7490 					gaudi_tpc_interrupts_cause[i]);
7491 			/* If this is QM error, we need to soft-reset */
7492 			if (i == 15)
7493 				soft_reset_required = true;
7494 		}
7495 
7496 	/* Clear interrupts */
7497 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7498 
7499 	return soft_reset_required;
7500 }
7501 
7502 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7503 {
7504 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7505 }
7506 
7507 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7508 {
7509 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7510 }
7511 
7512 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7513 {
7514 	ktime_t zero_time = ktime_set(0, 0);
7515 
7516 	mutex_lock(&hdev->clk_throttling.lock);
7517 
7518 	switch (event_type) {
7519 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7520 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7521 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7522 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7523 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7524 		dev_info_ratelimited(hdev->dev,
7525 			"Clock throttling due to power consumption\n");
7526 		break;
7527 
7528 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7529 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7530 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7531 		dev_info_ratelimited(hdev->dev,
7532 			"Power envelop is safe, back to optimal clock\n");
7533 		break;
7534 
7535 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7536 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7537 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7538 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7539 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7540 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7541 		dev_info_ratelimited(hdev->dev,
7542 			"Clock throttling due to overheating\n");
7543 		break;
7544 
7545 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7546 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7547 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7548 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7549 		dev_info_ratelimited(hdev->dev,
7550 			"Thermal envelop is safe, back to optimal clock\n");
7551 		break;
7552 
7553 	default:
7554 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7555 			event_type);
7556 		break;
7557 	}
7558 
7559 	mutex_unlock(&hdev->clk_throttling.lock);
7560 }
7561 
7562 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7563 {
7564 	struct gaudi_device *gaudi = hdev->asic_specific;
7565 	struct hl_info_fw_err_info fw_err_info;
7566 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7567 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7568 	u32 fw_fatal_err_flag = 0, flags = 0;
7569 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7570 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7571 	bool reset_required, reset_direct = false;
7572 	u8 cause;
7573 	int rc;
7574 
7575 	if (event_type >= GAUDI_EVENT_SIZE) {
7576 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7577 				event_type, GAUDI_EVENT_SIZE - 1);
7578 		return;
7579 	}
7580 
7581 	gaudi->events_stat[event_type]++;
7582 	gaudi->events_stat_aggregate[event_type]++;
7583 
7584 	switch (event_type) {
7585 	case GAUDI_EVENT_PCIE_CORE_DERR:
7586 	case GAUDI_EVENT_PCIE_IF_DERR:
7587 	case GAUDI_EVENT_PCIE_PHY_DERR:
7588 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7589 	case GAUDI_EVENT_MME0_ACC_DERR:
7590 	case GAUDI_EVENT_MME0_SBAB_DERR:
7591 	case GAUDI_EVENT_MME1_ACC_DERR:
7592 	case GAUDI_EVENT_MME1_SBAB_DERR:
7593 	case GAUDI_EVENT_MME2_ACC_DERR:
7594 	case GAUDI_EVENT_MME2_SBAB_DERR:
7595 	case GAUDI_EVENT_MME3_ACC_DERR:
7596 	case GAUDI_EVENT_MME3_SBAB_DERR:
7597 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7598 		fallthrough;
7599 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7600 	case GAUDI_EVENT_PSOC_MEM_DERR:
7601 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7602 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7603 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7604 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7605 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7606 	case GAUDI_EVENT_MMU_DERR:
7607 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7608 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7609 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7610 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7611 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7612 		goto reset_device;
7613 
7614 	case GAUDI_EVENT_GIC500:
7615 	case GAUDI_EVENT_AXI_ECC:
7616 	case GAUDI_EVENT_L2_RAM_ECC:
7617 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7618 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7619 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7620 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7621 		goto reset_device;
7622 
7623 	case GAUDI_EVENT_HBM0_SPI_0:
7624 	case GAUDI_EVENT_HBM1_SPI_0:
7625 	case GAUDI_EVENT_HBM2_SPI_0:
7626 	case GAUDI_EVENT_HBM3_SPI_0:
7627 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7628 		gaudi_hbm_read_interrupts(hdev,
7629 				gaudi_hbm_event_to_dev(event_type),
7630 				&eq_entry->hbm_ecc_data);
7631 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7632 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7633 		goto reset_device;
7634 
7635 	case GAUDI_EVENT_HBM0_SPI_1:
7636 	case GAUDI_EVENT_HBM1_SPI_1:
7637 	case GAUDI_EVENT_HBM2_SPI_1:
7638 	case GAUDI_EVENT_HBM3_SPI_1:
7639 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7640 		gaudi_hbm_read_interrupts(hdev,
7641 				gaudi_hbm_event_to_dev(event_type),
7642 				&eq_entry->hbm_ecc_data);
7643 		hl_fw_unmask_irq(hdev, event_type);
7644 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7645 		break;
7646 
7647 	case GAUDI_EVENT_TPC0_DEC:
7648 	case GAUDI_EVENT_TPC1_DEC:
7649 	case GAUDI_EVENT_TPC2_DEC:
7650 	case GAUDI_EVENT_TPC3_DEC:
7651 	case GAUDI_EVENT_TPC4_DEC:
7652 	case GAUDI_EVENT_TPC5_DEC:
7653 	case GAUDI_EVENT_TPC6_DEC:
7654 	case GAUDI_EVENT_TPC7_DEC:
7655 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7656 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7657 		 * The SW upper layer will inspect an internal mapped area to indicate
7658 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7659 		 */
7660 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7661 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7662 		reset_required = gaudi_tpc_read_interrupts(hdev,
7663 					tpc_dec_event_to_tpc_id(event_type),
7664 					"AXI_SLV_DEC_Error");
7665 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7666 		if (reset_required) {
7667 			dev_err(hdev->dev, "reset required due to %s\n",
7668 				gaudi_irq_map_table[event_type].name);
7669 
7670 			reset_direct = true;
7671 			goto reset_device;
7672 		} else {
7673 			hl_fw_unmask_irq(hdev, event_type);
7674 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7675 		}
7676 		break;
7677 
7678 	case GAUDI_EVENT_TPC0_KRN_ERR:
7679 	case GAUDI_EVENT_TPC1_KRN_ERR:
7680 	case GAUDI_EVENT_TPC2_KRN_ERR:
7681 	case GAUDI_EVENT_TPC3_KRN_ERR:
7682 	case GAUDI_EVENT_TPC4_KRN_ERR:
7683 	case GAUDI_EVENT_TPC5_KRN_ERR:
7684 	case GAUDI_EVENT_TPC6_KRN_ERR:
7685 	case GAUDI_EVENT_TPC7_KRN_ERR:
7686 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7687 		reset_required = gaudi_tpc_read_interrupts(hdev,
7688 					tpc_krn_event_to_tpc_id(event_type),
7689 					"KRN_ERR");
7690 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7691 		if (reset_required) {
7692 			dev_err(hdev->dev, "reset required due to %s\n",
7693 				gaudi_irq_map_table[event_type].name);
7694 
7695 			reset_direct = true;
7696 			goto reset_device;
7697 		} else {
7698 			hl_fw_unmask_irq(hdev, event_type);
7699 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7700 		}
7701 		break;
7702 
7703 	case GAUDI_EVENT_PCIE_CORE_SERR:
7704 	case GAUDI_EVENT_PCIE_IF_SERR:
7705 	case GAUDI_EVENT_PCIE_PHY_SERR:
7706 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7707 	case GAUDI_EVENT_MME0_ACC_SERR:
7708 	case GAUDI_EVENT_MME0_SBAB_SERR:
7709 	case GAUDI_EVENT_MME1_ACC_SERR:
7710 	case GAUDI_EVENT_MME1_SBAB_SERR:
7711 	case GAUDI_EVENT_MME2_ACC_SERR:
7712 	case GAUDI_EVENT_MME2_SBAB_SERR:
7713 	case GAUDI_EVENT_MME3_ACC_SERR:
7714 	case GAUDI_EVENT_MME3_SBAB_SERR:
7715 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7716 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7717 	case GAUDI_EVENT_PSOC_MEM_SERR:
7718 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7719 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7720 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7721 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7722 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7723 		fallthrough;
7724 	case GAUDI_EVENT_MMU_SERR:
7725 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7726 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7727 		hl_fw_unmask_irq(hdev, event_type);
7728 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7729 		break;
7730 
7731 	case GAUDI_EVENT_PCIE_DEC:
7732 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7733 	case GAUDI_EVENT_PSOC_AXI_DEC:
7734 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7735 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7736 		hl_fw_unmask_irq(hdev, event_type);
7737 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7738 		break;
7739 
7740 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7741 	case GAUDI_EVENT_MMU_WR_PERM:
7742 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7743 		hl_fw_unmask_irq(hdev, event_type);
7744 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7745 		break;
7746 
7747 	case GAUDI_EVENT_MME0_WBC_RSP:
7748 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7749 	case GAUDI_EVENT_MME1_WBC_RSP:
7750 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7751 	case GAUDI_EVENT_MME2_WBC_RSP:
7752 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7753 	case GAUDI_EVENT_MME3_WBC_RSP:
7754 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7755 	case GAUDI_EVENT_RAZWI_OR_ADC:
7756 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7757 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7758 		fallthrough;
7759 	case GAUDI_EVENT_NIC0_QM0:
7760 	case GAUDI_EVENT_NIC0_QM1:
7761 	case GAUDI_EVENT_NIC1_QM0:
7762 	case GAUDI_EVENT_NIC1_QM1:
7763 	case GAUDI_EVENT_NIC2_QM0:
7764 	case GAUDI_EVENT_NIC2_QM1:
7765 	case GAUDI_EVENT_NIC3_QM0:
7766 	case GAUDI_EVENT_NIC3_QM1:
7767 	case GAUDI_EVENT_NIC4_QM0:
7768 	case GAUDI_EVENT_NIC4_QM1:
7769 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7770 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7771 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7772 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7773 		hl_fw_unmask_irq(hdev, event_type);
7774 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7775 		break;
7776 
7777 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7778 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7779 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7780 		goto reset_device;
7781 
7782 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7783 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7784 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7785 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7786 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7787 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7788 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7789 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7790 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7791 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7792 		hl_fw_unmask_irq(hdev, event_type);
7793 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7794 		break;
7795 
7796 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7797 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7798 		hl_fw_unmask_irq(hdev, event_type);
7799 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7800 		break;
7801 
7802 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7803 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7804 		gaudi_print_sm_sei_info(hdev, event_type,
7805 					&eq_entry->sm_sei_data);
7806 		rc = hl_state_dump(hdev);
7807 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7808 		if (rc)
7809 			dev_err(hdev->dev,
7810 				"Error during system state dump %d\n", rc);
7811 		hl_fw_unmask_irq(hdev, event_type);
7812 		break;
7813 
7814 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7815 		break;
7816 
7817 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7818 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7819 		hl_fw_unmask_irq(hdev, event_type);
7820 		break;
7821 
7822 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7823 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7824 		dev_err(hdev->dev,
7825 			"Received high temp H/W interrupt %d (cause %d)\n",
7826 			event_type, cause);
7827 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7828 		break;
7829 
7830 	case GAUDI_EVENT_DEV_RESET_REQ:
7831 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7832 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7833 		goto reset_device;
7834 
7835 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7836 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7837 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7838 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7839 		goto reset_device;
7840 
7841 	case GAUDI_EVENT_FW_ALIVE_S:
7842 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7843 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7844 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7845 		fw_err_info.event_id = event_type;
7846 		fw_err_info.event_mask = &event_mask;
7847 		hl_handle_fw_err(hdev, &fw_err_info);
7848 		goto reset_device;
7849 
7850 	default:
7851 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7852 				event_type);
7853 		break;
7854 	}
7855 
7856 	if (event_mask)
7857 		hl_notifier_event_send_all(hdev, event_mask);
7858 
7859 	return;
7860 
7861 reset_device:
7862 	reset_required = true;
7863 
7864 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7865 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7866 
7867 		/* notify on device unavailable while the reset triggered by fw */
7868 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7869 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7870 	} else if (hdev->hard_reset_on_fw_events) {
7871 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7872 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7873 	} else {
7874 		reset_required = false;
7875 	}
7876 
7877 	if (reset_required) {
7878 		/* escalate general hw errors to critical/fatal error */
7879 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7880 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7881 
7882 		hl_device_cond_reset(hdev, flags, event_mask);
7883 	} else {
7884 		hl_fw_unmask_irq(hdev, event_type);
7885 		/* Notification on occurred event needs to be sent although reset is not executed */
7886 		if (event_mask)
7887 			hl_notifier_event_send_all(hdev, event_mask);
7888 	}
7889 }
7890 
7891 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7892 {
7893 	struct gaudi_device *gaudi = hdev->asic_specific;
7894 
7895 	if (aggregate) {
7896 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7897 		return gaudi->events_stat_aggregate;
7898 	}
7899 
7900 	*size = (u32) sizeof(gaudi->events_stat);
7901 	return gaudi->events_stat;
7902 }
7903 
7904 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7905 {
7906 	struct gaudi_device *gaudi = hdev->asic_specific;
7907 	u32 status, timeout_usec;
7908 	int rc;
7909 
7910 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7911 		hdev->reset_info.hard_reset_pending)
7912 		return 0;
7913 
7914 	if (hdev->pldm)
7915 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7916 	else
7917 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7918 
7919 	/* L0 & L1 invalidation */
7920 	WREG32(mmSTLB_INV_PS, 3);
7921 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7922 	WREG32(mmSTLB_INV_PS, 2);
7923 
7924 	rc = hl_poll_timeout(
7925 		hdev,
7926 		mmSTLB_INV_PS,
7927 		status,
7928 		!status,
7929 		1000,
7930 		timeout_usec);
7931 
7932 	WREG32(mmSTLB_INV_SET, 0);
7933 
7934 	return rc;
7935 }
7936 
7937 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7938 						bool is_hard, u32 flags,
7939 						u32 asid, u64 va, u64 size)
7940 {
7941 	/* Treat as invalidate all because there is no range invalidation
7942 	 * in Gaudi
7943 	 */
7944 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7945 }
7946 
7947 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7948 {
7949 	u32 status, timeout_usec;
7950 	int rc;
7951 
7952 	if (hdev->pldm)
7953 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7954 	else
7955 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7956 
7957 	WREG32(MMU_ASID, asid);
7958 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7959 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7960 	WREG32(MMU_BUSY, 0x80000000);
7961 
7962 	rc = hl_poll_timeout(
7963 		hdev,
7964 		MMU_BUSY,
7965 		status,
7966 		!(status & 0x80000000),
7967 		1000,
7968 		timeout_usec);
7969 
7970 	if (rc) {
7971 		dev_err(hdev->dev,
7972 			"Timeout during MMU hop0 config of asid %d\n", asid);
7973 		return rc;
7974 	}
7975 
7976 	return 0;
7977 }
7978 
7979 static int gaudi_send_heartbeat(struct hl_device *hdev)
7980 {
7981 	struct gaudi_device *gaudi = hdev->asic_specific;
7982 
7983 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7984 		return 0;
7985 
7986 	return hl_fw_send_heartbeat(hdev);
7987 }
7988 
7989 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7990 {
7991 	struct gaudi_device *gaudi = hdev->asic_specific;
7992 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7993 	int rc;
7994 
7995 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7996 		return 0;
7997 
7998 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7999 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8000 					mmCPU_BOOT_ERR1);
8001 	if (rc)
8002 		return rc;
8003 
8004 	if (!strlen(prop->cpucp_info.card_name))
8005 		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8006 				CARD_NAME_MAX_LEN);
8007 
8008 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8009 
8010 	set_default_power_values(hdev);
8011 
8012 	return 0;
8013 }
8014 
8015 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8016 		struct engines_data *e)
8017 {
8018 	struct gaudi_device *gaudi = hdev->asic_specific;
8019 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8020 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8021 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8022 	unsigned long *mask = (unsigned long *)mask_arr;
8023 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8024 	bool is_idle = true, is_eng_idle, is_slave;
8025 	u64 offset;
8026 	int i, dma_id, port;
8027 
8028 	if (e)
8029 		hl_engine_data_sprintf(e,
8030 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8031 			"---  -------  ------------  ----------  -------------\n");
8032 
8033 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8034 		dma_id = gaudi_dma_assignment[i];
8035 		offset = dma_id * DMA_QMAN_OFFSET;
8036 
8037 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8038 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8039 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8040 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8041 				IS_DMA_IDLE(dma_core_sts0);
8042 		is_idle &= is_eng_idle;
8043 
8044 		if (mask && !is_eng_idle)
8045 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8046 		if (e)
8047 			hl_engine_data_sprintf(e, fmt, dma_id,
8048 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8049 				qm_cgm_sts, dma_core_sts0);
8050 	}
8051 
8052 	if (e)
8053 		hl_engine_data_sprintf(e,
8054 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8055 			"---  -------  ------------  ----------  ----------\n");
8056 
8057 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8058 		offset = i * TPC_QMAN_OFFSET;
8059 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8060 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8061 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8062 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8063 				IS_TPC_IDLE(tpc_cfg_sts);
8064 		is_idle &= is_eng_idle;
8065 
8066 		if (mask && !is_eng_idle)
8067 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8068 		if (e)
8069 			hl_engine_data_sprintf(e, fmt, i,
8070 				is_eng_idle ? "Y" : "N",
8071 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8072 	}
8073 
8074 	if (e)
8075 		hl_engine_data_sprintf(e,
8076 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8077 			"---  -------  ------------  ----------  -----------\n");
8078 
8079 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8080 		offset = i * MME_QMAN_OFFSET;
8081 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8082 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8083 
8084 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8085 		is_slave = i % 2;
8086 		if (!is_slave) {
8087 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8088 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8089 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8090 		}
8091 
8092 		is_idle &= is_eng_idle;
8093 
8094 		if (mask && !is_eng_idle)
8095 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8096 		if (e) {
8097 			if (!is_slave)
8098 				hl_engine_data_sprintf(e, fmt, i,
8099 					is_eng_idle ? "Y" : "N",
8100 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8101 			else
8102 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8103 					is_eng_idle ? "Y" : "N", "-",
8104 					"-", mme_arch_sts);
8105 		}
8106 	}
8107 
8108 	if (e)
8109 		hl_engine_data_sprintf(e,
8110 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8111 				"---  -------  ------------  ----------\n");
8112 
8113 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8114 		offset = i * NIC_MACRO_QMAN_OFFSET;
8115 		port = 2 * i;
8116 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8117 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8118 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8119 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8120 			is_idle &= is_eng_idle;
8121 
8122 			if (mask && !is_eng_idle)
8123 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8124 			if (e)
8125 				hl_engine_data_sprintf(e, nic_fmt, port,
8126 						is_eng_idle ? "Y" : "N",
8127 						qm_glbl_sts0, qm_cgm_sts);
8128 		}
8129 
8130 		port = 2 * i + 1;
8131 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8132 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8133 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8134 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8135 			is_idle &= is_eng_idle;
8136 
8137 			if (mask && !is_eng_idle)
8138 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8139 			if (e)
8140 				hl_engine_data_sprintf(e, nic_fmt, port,
8141 						is_eng_idle ? "Y" : "N",
8142 						qm_glbl_sts0, qm_cgm_sts);
8143 		}
8144 	}
8145 
8146 	if (e)
8147 		hl_engine_data_sprintf(e, "\n");
8148 
8149 	return is_idle;
8150 }
8151 
8152 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8153 	__acquires(&gaudi->hw_queues_lock)
8154 {
8155 	struct gaudi_device *gaudi = hdev->asic_specific;
8156 
8157 	spin_lock(&gaudi->hw_queues_lock);
8158 }
8159 
8160 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8161 	__releases(&gaudi->hw_queues_lock)
8162 {
8163 	struct gaudi_device *gaudi = hdev->asic_specific;
8164 
8165 	spin_unlock(&gaudi->hw_queues_lock);
8166 }
8167 
8168 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8169 {
8170 	return hdev->pdev->device;
8171 }
8172 
8173 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8174 				size_t max_size)
8175 {
8176 	struct gaudi_device *gaudi = hdev->asic_specific;
8177 
8178 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8179 		return 0;
8180 
8181 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8182 }
8183 
8184 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8185 {
8186 	struct gaudi_device *gaudi = hdev->asic_specific;
8187 
8188 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8189 		return 0;
8190 
8191 	return hl_fw_get_monitor_dump(hdev, data);
8192 }
8193 
8194 /*
8195  * this function should be used only during initialization and/or after reset,
8196  * when there are no active users.
8197  */
8198 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8199 {
8200 	u64 kernel_timeout;
8201 	u32 status, offset;
8202 	int rc;
8203 
8204 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8205 
8206 	if (hdev->pldm)
8207 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8208 	else
8209 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8210 
8211 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8212 			lower_32_bits(tpc_kernel));
8213 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8214 			upper_32_bits(tpc_kernel));
8215 
8216 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8217 			lower_32_bits(tpc_kernel));
8218 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8219 			upper_32_bits(tpc_kernel));
8220 	/* set a valid LUT pointer, content is of no significance */
8221 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8222 			lower_32_bits(tpc_kernel));
8223 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8224 			upper_32_bits(tpc_kernel));
8225 
8226 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8227 			lower_32_bits(CFG_BASE +
8228 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8229 
8230 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8231 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8232 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8233 	/* wait a bit for the engine to start executing */
8234 	usleep_range(1000, 1500);
8235 
8236 	/* wait until engine has finished executing */
8237 	rc = hl_poll_timeout(
8238 		hdev,
8239 		mmTPC0_CFG_STATUS + offset,
8240 		status,
8241 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8242 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8243 		1000,
8244 		kernel_timeout);
8245 
8246 	if (rc) {
8247 		dev_err(hdev->dev,
8248 			"Timeout while waiting for TPC%d icache prefetch\n",
8249 			tpc_id);
8250 		return -EIO;
8251 	}
8252 
8253 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8254 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8255 
8256 	/* wait a bit for the engine to start executing */
8257 	usleep_range(1000, 1500);
8258 
8259 	/* wait until engine has finished executing */
8260 	rc = hl_poll_timeout(
8261 		hdev,
8262 		mmTPC0_CFG_STATUS + offset,
8263 		status,
8264 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8265 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8266 		1000,
8267 		kernel_timeout);
8268 
8269 	if (rc) {
8270 		dev_err(hdev->dev,
8271 			"Timeout while waiting for TPC%d vector pipe\n",
8272 			tpc_id);
8273 		return -EIO;
8274 	}
8275 
8276 	rc = hl_poll_timeout(
8277 		hdev,
8278 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8279 		status,
8280 		(status == 0),
8281 		1000,
8282 		kernel_timeout);
8283 
8284 	if (rc) {
8285 		dev_err(hdev->dev,
8286 			"Timeout while waiting for TPC%d kernel to execute\n",
8287 			tpc_id);
8288 		return -EIO;
8289 	}
8290 
8291 	return 0;
8292 }
8293 
8294 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8295 		struct hl_ctx *ctx)
8296 {
8297 	struct gaudi_device *gaudi = hdev->asic_specific;
8298 	int min_alloc_order, rc, collective_cb_size;
8299 
8300 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8301 		return 0;
8302 
8303 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8304 							HOST_SPACE_INTERNAL_CB_SZ,
8305 							&hdev->internal_cb_pool_dma_addr,
8306 							GFP_KERNEL | __GFP_ZERO);
8307 
8308 	if (!hdev->internal_cb_pool_virt_addr)
8309 		return -ENOMEM;
8310 
8311 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8312 			sizeof(struct packet_fence);
8313 	min_alloc_order = ilog2(collective_cb_size);
8314 
8315 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8316 	if (!hdev->internal_cb_pool) {
8317 		dev_err(hdev->dev,
8318 			"Failed to create internal CB pool\n");
8319 		rc = -ENOMEM;
8320 		goto free_internal_cb_pool;
8321 	}
8322 
8323 	rc = gen_pool_add(hdev->internal_cb_pool,
8324 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8325 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8326 	if (rc) {
8327 		dev_err(hdev->dev,
8328 			"Failed to add memory to internal CB pool\n");
8329 		rc = -EFAULT;
8330 		goto destroy_internal_cb_pool;
8331 	}
8332 
8333 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8334 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8335 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8336 
8337 	if (!hdev->internal_cb_va_base) {
8338 		rc = -ENOMEM;
8339 		goto destroy_internal_cb_pool;
8340 	}
8341 
8342 	mutex_lock(&hdev->mmu_lock);
8343 
8344 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8345 			hdev->internal_cb_pool_dma_addr,
8346 			HOST_SPACE_INTERNAL_CB_SZ);
8347 	if (rc)
8348 		goto unreserve_internal_cb_pool;
8349 
8350 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8351 	if (rc)
8352 		goto unmap_internal_cb_pool;
8353 
8354 	mutex_unlock(&hdev->mmu_lock);
8355 
8356 	return 0;
8357 
8358 unmap_internal_cb_pool:
8359 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8360 			HOST_SPACE_INTERNAL_CB_SZ);
8361 unreserve_internal_cb_pool:
8362 	mutex_unlock(&hdev->mmu_lock);
8363 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8364 			HOST_SPACE_INTERNAL_CB_SZ);
8365 destroy_internal_cb_pool:
8366 	gen_pool_destroy(hdev->internal_cb_pool);
8367 free_internal_cb_pool:
8368 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8369 					hdev->internal_cb_pool_dma_addr);
8370 
8371 	return rc;
8372 }
8373 
8374 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8375 		struct hl_ctx *ctx)
8376 {
8377 	struct gaudi_device *gaudi = hdev->asic_specific;
8378 
8379 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8380 		return;
8381 
8382 	mutex_lock(&hdev->mmu_lock);
8383 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8384 			HOST_SPACE_INTERNAL_CB_SZ);
8385 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8386 			HOST_SPACE_INTERNAL_CB_SZ);
8387 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8388 	mutex_unlock(&hdev->mmu_lock);
8389 
8390 	gen_pool_destroy(hdev->internal_cb_pool);
8391 
8392 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8393 					hdev->internal_cb_pool_dma_addr);
8394 }
8395 
8396 static int gaudi_ctx_init(struct hl_ctx *ctx)
8397 {
8398 	int rc;
8399 
8400 	if (ctx->asid == HL_KERNEL_ASID_ID)
8401 		return 0;
8402 
8403 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8404 	if (rc)
8405 		return rc;
8406 
8407 	rc = gaudi_restore_user_registers(ctx->hdev);
8408 	if (rc)
8409 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8410 
8411 	return rc;
8412 }
8413 
8414 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8415 {
8416 	if (ctx->asid == HL_KERNEL_ASID_ID)
8417 		return;
8418 
8419 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8420 }
8421 
8422 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8423 {
8424 	return 0;
8425 }
8426 
8427 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8428 {
8429 	return gaudi_cq_assignment[cq_idx];
8430 }
8431 
8432 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8433 {
8434 	return sizeof(struct packet_msg_short) +
8435 			sizeof(struct packet_msg_prot) * 2;
8436 }
8437 
8438 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8439 {
8440 	return sizeof(struct packet_msg_short) * 4 +
8441 			sizeof(struct packet_fence) +
8442 			sizeof(struct packet_msg_prot) * 2;
8443 }
8444 
8445 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8446 {
8447 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8448 }
8449 
8450 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8451 				u32 size, bool eb)
8452 {
8453 	struct hl_cb *cb = (struct hl_cb *) data;
8454 	struct packet_msg_short *pkt;
8455 	u32 value, ctl, pkt_size = sizeof(*pkt);
8456 
8457 	pkt = cb->kernel_address + size;
8458 	memset(pkt, 0, pkt_size);
8459 
8460 	/* Inc by 1, Mode ADD */
8461 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8462 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8463 
8464 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8465 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8466 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8467 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8468 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8469 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8470 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8471 
8472 	pkt->value = cpu_to_le32(value);
8473 	pkt->ctl = cpu_to_le32(ctl);
8474 
8475 	return size + pkt_size;
8476 }
8477 
8478 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8479 					u16 addr)
8480 {
8481 	u32 ctl, pkt_size = sizeof(*pkt);
8482 
8483 	memset(pkt, 0, pkt_size);
8484 
8485 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8486 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8487 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8488 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8489 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8490 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8491 
8492 	pkt->value = cpu_to_le32(value);
8493 	pkt->ctl = cpu_to_le32(ctl);
8494 
8495 	return pkt_size;
8496 }
8497 
8498 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8499 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8500 		u16 sob_val, u16 mon_id)
8501 {
8502 	u64 monitor_base;
8503 	u32 ctl, value, pkt_size = sizeof(*pkt);
8504 	u16 msg_addr_offset;
8505 	u8 mask;
8506 
8507 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8508 		dev_err(hdev->dev,
8509 			"sob_base %u (mask %#x) is not valid\n",
8510 			sob_base, sob_mask);
8511 		return 0;
8512 	}
8513 
8514 	/*
8515 	 * monitor_base should be the content of the base0 address registers,
8516 	 * so it will be added to the msg short offsets
8517 	 */
8518 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8519 
8520 	msg_addr_offset =
8521 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8522 				monitor_base;
8523 
8524 	memset(pkt, 0, pkt_size);
8525 
8526 	/* Monitor config packet: bind the monitor to a sync object */
8527 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8528 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8529 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8530 			0); /* GREATER OR EQUAL*/
8531 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8532 
8533 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8534 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8535 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8536 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8537 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8538 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8539 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8540 
8541 	pkt->value = cpu_to_le32(value);
8542 	pkt->ctl = cpu_to_le32(ctl);
8543 
8544 	return pkt_size;
8545 }
8546 
8547 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8548 {
8549 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8550 
8551 	memset(pkt, 0, pkt_size);
8552 
8553 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8554 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8555 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8556 
8557 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8558 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8559 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8560 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8561 
8562 	pkt->cfg = cpu_to_le32(cfg);
8563 	pkt->ctl = cpu_to_le32(ctl);
8564 
8565 	return pkt_size;
8566 }
8567 
8568 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8569 {
8570 	u32 offset, nic_index;
8571 
8572 	switch (queue_id) {
8573 	case GAUDI_QUEUE_ID_DMA_0_0:
8574 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8575 		break;
8576 	case GAUDI_QUEUE_ID_DMA_0_1:
8577 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8578 		break;
8579 	case GAUDI_QUEUE_ID_DMA_0_2:
8580 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8581 		break;
8582 	case GAUDI_QUEUE_ID_DMA_0_3:
8583 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8584 		break;
8585 	case GAUDI_QUEUE_ID_DMA_1_0:
8586 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8587 		break;
8588 	case GAUDI_QUEUE_ID_DMA_1_1:
8589 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8590 		break;
8591 	case GAUDI_QUEUE_ID_DMA_1_2:
8592 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8593 		break;
8594 	case GAUDI_QUEUE_ID_DMA_1_3:
8595 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8596 		break;
8597 	case GAUDI_QUEUE_ID_DMA_5_0:
8598 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8599 		break;
8600 	case GAUDI_QUEUE_ID_DMA_5_1:
8601 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8602 		break;
8603 	case GAUDI_QUEUE_ID_DMA_5_2:
8604 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8605 		break;
8606 	case GAUDI_QUEUE_ID_DMA_5_3:
8607 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8608 		break;
8609 	case GAUDI_QUEUE_ID_TPC_7_0:
8610 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8611 		break;
8612 	case GAUDI_QUEUE_ID_TPC_7_1:
8613 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8614 		break;
8615 	case GAUDI_QUEUE_ID_TPC_7_2:
8616 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8617 		break;
8618 	case GAUDI_QUEUE_ID_TPC_7_3:
8619 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8620 		break;
8621 	case GAUDI_QUEUE_ID_NIC_0_0:
8622 	case GAUDI_QUEUE_ID_NIC_1_0:
8623 	case GAUDI_QUEUE_ID_NIC_2_0:
8624 	case GAUDI_QUEUE_ID_NIC_3_0:
8625 	case GAUDI_QUEUE_ID_NIC_4_0:
8626 	case GAUDI_QUEUE_ID_NIC_5_0:
8627 	case GAUDI_QUEUE_ID_NIC_6_0:
8628 	case GAUDI_QUEUE_ID_NIC_7_0:
8629 	case GAUDI_QUEUE_ID_NIC_8_0:
8630 	case GAUDI_QUEUE_ID_NIC_9_0:
8631 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8632 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8633 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8634 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8635 		break;
8636 	case GAUDI_QUEUE_ID_NIC_0_1:
8637 	case GAUDI_QUEUE_ID_NIC_1_1:
8638 	case GAUDI_QUEUE_ID_NIC_2_1:
8639 	case GAUDI_QUEUE_ID_NIC_3_1:
8640 	case GAUDI_QUEUE_ID_NIC_4_1:
8641 	case GAUDI_QUEUE_ID_NIC_5_1:
8642 	case GAUDI_QUEUE_ID_NIC_6_1:
8643 	case GAUDI_QUEUE_ID_NIC_7_1:
8644 	case GAUDI_QUEUE_ID_NIC_8_1:
8645 	case GAUDI_QUEUE_ID_NIC_9_1:
8646 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8647 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8648 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8649 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8650 		break;
8651 	case GAUDI_QUEUE_ID_NIC_0_2:
8652 	case GAUDI_QUEUE_ID_NIC_1_2:
8653 	case GAUDI_QUEUE_ID_NIC_2_2:
8654 	case GAUDI_QUEUE_ID_NIC_3_2:
8655 	case GAUDI_QUEUE_ID_NIC_4_2:
8656 	case GAUDI_QUEUE_ID_NIC_5_2:
8657 	case GAUDI_QUEUE_ID_NIC_6_2:
8658 	case GAUDI_QUEUE_ID_NIC_7_2:
8659 	case GAUDI_QUEUE_ID_NIC_8_2:
8660 	case GAUDI_QUEUE_ID_NIC_9_2:
8661 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8662 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8663 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8664 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8665 		break;
8666 	case GAUDI_QUEUE_ID_NIC_0_3:
8667 	case GAUDI_QUEUE_ID_NIC_1_3:
8668 	case GAUDI_QUEUE_ID_NIC_2_3:
8669 	case GAUDI_QUEUE_ID_NIC_3_3:
8670 	case GAUDI_QUEUE_ID_NIC_4_3:
8671 	case GAUDI_QUEUE_ID_NIC_5_3:
8672 	case GAUDI_QUEUE_ID_NIC_6_3:
8673 	case GAUDI_QUEUE_ID_NIC_7_3:
8674 	case GAUDI_QUEUE_ID_NIC_8_3:
8675 	case GAUDI_QUEUE_ID_NIC_9_3:
8676 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8677 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8678 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8679 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8680 		break;
8681 	default:
8682 		return -EINVAL;
8683 	}
8684 
8685 	*addr = CFG_BASE + offset;
8686 
8687 	return 0;
8688 }
8689 
8690 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8691 {
8692 	u64 monitor_base;
8693 	u32 size = 0;
8694 	u16 msg_addr_offset;
8695 
8696 	/*
8697 	 * monitor_base should be the content of the base0 address registers,
8698 	 * so it will be added to the msg short offsets
8699 	 */
8700 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8701 
8702 	/* First monitor config packet: low address of the sync */
8703 	msg_addr_offset =
8704 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8705 				monitor_base;
8706 
8707 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8708 					msg_addr_offset);
8709 
8710 	/* Second monitor config packet: high address of the sync */
8711 	msg_addr_offset =
8712 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8713 				monitor_base;
8714 
8715 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8716 					msg_addr_offset);
8717 
8718 	/*
8719 	 * Third monitor config packet: the payload, i.e. what to write when the
8720 	 * sync triggers
8721 	 */
8722 	msg_addr_offset =
8723 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8724 				monitor_base;
8725 
8726 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8727 
8728 	return size;
8729 }
8730 
8731 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8732 				struct hl_gen_wait_properties *prop)
8733 {
8734 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8735 	void *buf = cb->kernel_address;
8736 	u64 fence_addr = 0;
8737 	u32 size = prop->size;
8738 
8739 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8740 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8741 				prop->q_idx);
8742 		return 0;
8743 	}
8744 
8745 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8746 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8747 			prop->sob_mask, prop->sob_val, prop->mon_id);
8748 	size += gaudi_add_fence_pkt(buf + size);
8749 
8750 	return size;
8751 }
8752 
8753 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8754 {
8755 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8756 
8757 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8758 		hw_sob->sob_id);
8759 
8760 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8761 			hw_sob->sob_id * 4, 0);
8762 
8763 	kref_init(&hw_sob->kref);
8764 }
8765 
8766 static u64 gaudi_get_device_time(struct hl_device *hdev)
8767 {
8768 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8769 
8770 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8771 }
8772 
8773 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8774 				u32 *block_size, u32 *block_id)
8775 {
8776 	return -EPERM;
8777 }
8778 
8779 static int gaudi_block_mmap(struct hl_device *hdev,
8780 				struct vm_area_struct *vma,
8781 				u32 block_id, u32 block_size)
8782 {
8783 	return -EPERM;
8784 }
8785 
8786 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8787 {
8788 	struct cpu_dyn_regs *dyn_regs =
8789 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8790 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8791 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8792 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8793 
8794 	WREG32(irq_handler_offset,
8795 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8796 }
8797 
8798 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8799 {
8800 	return -EINVAL;
8801 }
8802 
8803 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8804 {
8805 	switch (pll_idx) {
8806 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8807 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8808 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8809 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8810 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8811 	case HL_GAUDI_MME_PLL: return MME_PLL;
8812 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8813 	case HL_GAUDI_IF_PLL: return IF_PLL;
8814 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8815 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8816 	default: return -EINVAL;
8817 	}
8818 }
8819 
8820 static int gaudi_add_sync_to_engine_map_entry(
8821 	struct hl_sync_to_engine_map *map, u32 reg_value,
8822 	enum hl_sync_engine_type engine_type, u32 engine_id)
8823 {
8824 	struct hl_sync_to_engine_map_entry *entry;
8825 
8826 	/* Reg value represents a partial address of sync object,
8827 	 * it is used as unique identifier. For this we need to
8828 	 * clear the cutoff cfg base bits from the value.
8829 	 */
8830 	if (reg_value == 0 || reg_value == 0xffffffff)
8831 		return 0;
8832 	reg_value -= lower_32_bits(CFG_BASE);
8833 
8834 	/* create a new hash entry */
8835 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8836 	if (!entry)
8837 		return -ENOMEM;
8838 	entry->engine_type = engine_type;
8839 	entry->engine_id = engine_id;
8840 	entry->sync_id = reg_value;
8841 	hash_add(map->tb, &entry->node, reg_value);
8842 
8843 	return 0;
8844 }
8845 
8846 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8847 				struct hl_sync_to_engine_map *map)
8848 {
8849 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8850 	int i, j, rc;
8851 	u32 reg_value;
8852 
8853 	/* Iterate over TPC engines */
8854 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8855 
8856 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8857 					sds->props[SP_NEXT_TPC] * i);
8858 
8859 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8860 							ENGINE_TPC, i);
8861 		if (rc)
8862 			goto free_sync_to_engine_map;
8863 	}
8864 
8865 	/* Iterate over MME engines */
8866 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8867 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8868 
8869 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8870 						sds->props[SP_NEXT_MME] * i +
8871 						j * sizeof(u32));
8872 
8873 			rc = gaudi_add_sync_to_engine_map_entry(
8874 				map, reg_value, ENGINE_MME,
8875 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8876 			if (rc)
8877 				goto free_sync_to_engine_map;
8878 		}
8879 	}
8880 
8881 	/* Iterate over DMA engines */
8882 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8883 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8884 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8885 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8886 							ENGINE_DMA, i);
8887 		if (rc)
8888 			goto free_sync_to_engine_map;
8889 	}
8890 
8891 	return 0;
8892 
8893 free_sync_to_engine_map:
8894 	hl_state_dump_free_sync_to_engine_map(map);
8895 
8896 	return rc;
8897 }
8898 
8899 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8900 {
8901 	return FIELD_GET(
8902 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8903 		mon->status);
8904 }
8905 
8906 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8907 {
8908 	const size_t max_write = 10;
8909 	u32 gid, mask, sob;
8910 	int i, offset;
8911 
8912 	/* Sync object ID is calculated as follows:
8913 	 * (8 * group_id + cleared bits in mask)
8914 	 */
8915 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8916 			mon->arm_data);
8917 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8918 			mon->arm_data);
8919 
8920 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8921 		max_write; mask >>= 1, i++) {
8922 		if (!(mask & 1)) {
8923 			sob = gid * MONITOR_MAX_SOBS + i;
8924 
8925 			if (offset > 0)
8926 				offset += snprintf(sobs + offset, max_write,
8927 							", ");
8928 
8929 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8930 		}
8931 	}
8932 }
8933 
8934 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8935 				struct hl_device *hdev,
8936 				struct hl_mon_state_dump *mon)
8937 {
8938 	const char *name;
8939 	char scratch_buf1[BIN_REG_STRING_SIZE],
8940 		scratch_buf2[BIN_REG_STRING_SIZE];
8941 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8942 
8943 	name = hl_state_dump_get_monitor_name(hdev, mon);
8944 	if (!name)
8945 		name = "";
8946 
8947 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8948 
8949 	return hl_snprintf_resize(
8950 		buf, size, offset,
8951 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8952 		mon->id, name,
8953 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8954 				mon->arm_data),
8955 		hl_format_as_binary(
8956 			scratch_buf1, sizeof(scratch_buf1),
8957 			FIELD_GET(
8958 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8959 				mon->arm_data)),
8960 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8961 				mon->arm_data),
8962 		mon->wr_data,
8963 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8964 		hl_format_as_binary(
8965 			scratch_buf2, sizeof(scratch_buf2),
8966 			FIELD_GET(
8967 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8968 				mon->status)),
8969 		monitored_sobs);
8970 }
8971 
8972 
8973 static int gaudi_print_fences_single_engine(
8974 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8975 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8976 	size_t *size, size_t *offset)
8977 {
8978 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8979 	int rc = -ENOMEM, i;
8980 	u32 *statuses, *fences;
8981 
8982 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8983 			sizeof(*statuses), GFP_KERNEL);
8984 	if (!statuses)
8985 		goto out;
8986 
8987 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8988 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8989 			 sizeof(*fences), GFP_KERNEL);
8990 	if (!fences)
8991 		goto free_status;
8992 
8993 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8994 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8995 
8996 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8997 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8998 		fences[i] = RREG32(base_offset + i * sizeof(u32));
8999 
9000 	/* The actual print */
9001 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9002 		u32 fence_id;
9003 		u64 fence_cnt, fence_rdata;
9004 		const char *engine_name;
9005 
9006 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9007 			statuses[i]))
9008 			continue;
9009 
9010 		fence_id =
9011 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9012 		fence_cnt = base_offset + CFG_BASE +
9013 			sizeof(u32) *
9014 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9015 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9016 				sds->props[SP_FENCE0_RDATA_OFFSET];
9017 		engine_name = hl_sync_engine_to_string(engine_type);
9018 
9019 		rc = hl_snprintf_resize(
9020 			buf, size, offset,
9021 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9022 			engine_name, engine_id,
9023 			i, fence_id,
9024 			fence_cnt, engine_name, engine_id, fence_id, i,
9025 			fence_rdata, engine_name, engine_id, fence_id, i,
9026 			fences[fence_id],
9027 			statuses[i]);
9028 		if (rc)
9029 			goto free_fences;
9030 	}
9031 
9032 	rc = 0;
9033 
9034 free_fences:
9035 	kfree(fences);
9036 free_status:
9037 	kfree(statuses);
9038 out:
9039 	return rc;
9040 }
9041 
9042 
9043 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9044 	.monitor_valid = gaudi_monitor_valid,
9045 	.print_single_monitor = gaudi_print_single_monitor,
9046 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9047 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9048 };
9049 
9050 static void gaudi_state_dump_init(struct hl_device *hdev)
9051 {
9052 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9053 	int i;
9054 
9055 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9056 		hash_add(sds->so_id_to_str_tb,
9057 			&gaudi_so_id_to_str[i].node,
9058 			gaudi_so_id_to_str[i].id);
9059 
9060 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9061 		hash_add(sds->monitor_id_to_str_tb,
9062 			&gaudi_monitor_id_to_str[i].node,
9063 			gaudi_monitor_id_to_str[i].id);
9064 
9065 	sds->props = gaudi_state_dump_specs_props;
9066 
9067 	sds->sync_namager_names = gaudi_sync_manager_names;
9068 
9069 	sds->funcs = gaudi_state_dump_funcs;
9070 }
9071 
9072 static u32 *gaudi_get_stream_master_qid_arr(void)
9073 {
9074 	return gaudi_stream_master;
9075 }
9076 
9077 static int gaudi_set_dram_properties(struct hl_device *hdev)
9078 {
9079 	return 0;
9080 }
9081 
9082 static int gaudi_set_binning_masks(struct hl_device *hdev)
9083 {
9084 	return 0;
9085 }
9086 
9087 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9088 {
9089 }
9090 
9091 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9092 {
9093 	struct hl_device *hdev = dev_get_drvdata(dev);
9094 	struct cpucp_info *cpucp_info;
9095 
9096 	cpucp_info = &hdev->asic_prop.cpucp_info;
9097 
9098 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9099 }
9100 
9101 static DEVICE_ATTR_RO(infineon_ver);
9102 
9103 static struct attribute *gaudi_vrm_dev_attrs[] = {
9104 	&dev_attr_infineon_ver.attr,
9105 	NULL,
9106 };
9107 
9108 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9109 					struct attribute_group *dev_vrm_attr_grp)
9110 {
9111 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9112 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9113 }
9114 
9115 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9116 {
9117 	return 0;
9118 }
9119 
9120 static const struct hl_asic_funcs gaudi_funcs = {
9121 	.early_init = gaudi_early_init,
9122 	.early_fini = gaudi_early_fini,
9123 	.late_init = gaudi_late_init,
9124 	.late_fini = gaudi_late_fini,
9125 	.sw_init = gaudi_sw_init,
9126 	.sw_fini = gaudi_sw_fini,
9127 	.hw_init = gaudi_hw_init,
9128 	.hw_fini = gaudi_hw_fini,
9129 	.halt_engines = gaudi_halt_engines,
9130 	.suspend = gaudi_suspend,
9131 	.resume = gaudi_resume,
9132 	.mmap = gaudi_mmap,
9133 	.ring_doorbell = gaudi_ring_doorbell,
9134 	.pqe_write = gaudi_pqe_write,
9135 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9136 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9137 	.scrub_device_mem = gaudi_scrub_device_mem,
9138 	.scrub_device_dram = gaudi_scrub_device_dram,
9139 	.get_int_queue_base = gaudi_get_int_queue_base,
9140 	.test_queues = gaudi_test_queues,
9141 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9142 	.asic_dma_pool_free = gaudi_dma_pool_free,
9143 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9144 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9145 	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9146 	.cs_parser = gaudi_cs_parser,
9147 	.dma_map_sgtable = hl_asic_dma_map_sgtable,
9148 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9149 	.update_eq_ci = gaudi_update_eq_ci,
9150 	.context_switch = gaudi_context_switch,
9151 	.restore_phase_topology = gaudi_restore_phase_topology,
9152 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9153 	.add_device_attr = gaudi_add_device_attr,
9154 	.handle_eqe = gaudi_handle_eqe,
9155 	.get_events_stat = gaudi_get_events_stat,
9156 	.read_pte = gaudi_read_pte,
9157 	.write_pte = gaudi_write_pte,
9158 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9159 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9160 	.mmu_prefetch_cache_range = NULL,
9161 	.send_heartbeat = gaudi_send_heartbeat,
9162 	.debug_coresight = gaudi_debug_coresight,
9163 	.is_device_idle = gaudi_is_device_idle,
9164 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9165 	.hw_queues_lock = gaudi_hw_queues_lock,
9166 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9167 	.get_pci_id = gaudi_get_pci_id,
9168 	.get_eeprom_data = gaudi_get_eeprom_data,
9169 	.get_monitor_dump = gaudi_get_monitor_dump,
9170 	.send_cpu_message = gaudi_send_cpu_message,
9171 	.pci_bars_map = gaudi_pci_bars_map,
9172 	.init_iatu = gaudi_init_iatu,
9173 	.rreg = hl_rreg,
9174 	.wreg = hl_wreg,
9175 	.halt_coresight = gaudi_halt_coresight,
9176 	.ctx_init = gaudi_ctx_init,
9177 	.ctx_fini = gaudi_ctx_fini,
9178 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9179 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9180 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9181 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9182 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9183 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9184 	.gen_signal_cb = gaudi_gen_signal_cb,
9185 	.gen_wait_cb = gaudi_gen_wait_cb,
9186 	.reset_sob = gaudi_reset_sob,
9187 	.reset_sob_group = gaudi_reset_sob_group,
9188 	.get_device_time = gaudi_get_device_time,
9189 	.pb_print_security_errors = NULL,
9190 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9191 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9192 	.get_dec_base_addr = NULL,
9193 	.scramble_addr = hl_mmu_scramble_addr,
9194 	.descramble_addr = hl_mmu_descramble_addr,
9195 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9196 	.get_hw_block_id = gaudi_get_hw_block_id,
9197 	.hw_block_mmap = gaudi_block_mmap,
9198 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9199 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9200 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9201 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9202 	.init_firmware_loader = gaudi_init_firmware_loader,
9203 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9204 	.state_dump_init = gaudi_state_dump_init,
9205 	.get_sob_addr = gaudi_get_sob_addr,
9206 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9207 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9208 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9209 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9210 	.access_dev_mem = hl_access_dev_mem,
9211 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9212 	.send_device_activity = gaudi_send_device_activity,
9213 	.set_dram_properties = gaudi_set_dram_properties,
9214 	.set_binning_masks = gaudi_set_binning_masks,
9215 };
9216 
9217 /**
9218  * gaudi_set_asic_funcs - set GAUDI function pointers
9219  *
9220  * @hdev: pointer to hl_device structure
9221  *
9222  */
9223 void gaudi_set_asic_funcs(struct hl_device *hdev)
9224 {
9225 	hdev->asic_funcs = &gaudi_funcs;
9226 }
9227