xref: /linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision 5027ec19f1049a07df5b0a37b1f462514cf2724b)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69 
70 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
71 
72 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
76 
77 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
86 
87 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
88 
89 #define GAUDI_MAX_STRING_LEN		20
90 
91 #define GAUDI_CB_POOL_CB_CNT		512
92 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
93 
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
95 
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
97 
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
99 
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
101 
102 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
103 
104 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105 
106 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
107 
108 #define MONITOR_SOB_STRING_SIZE		256
109 
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 	GAUDI_QUEUE_ID_DMA_0_0,
112 	GAUDI_QUEUE_ID_DMA_0_1,
113 	GAUDI_QUEUE_ID_DMA_0_2,
114 	GAUDI_QUEUE_ID_DMA_0_3,
115 	GAUDI_QUEUE_ID_DMA_1_0,
116 	GAUDI_QUEUE_ID_DMA_1_1,
117 	GAUDI_QUEUE_ID_DMA_1_2,
118 	GAUDI_QUEUE_ID_DMA_1_3
119 };
120 
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131 
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
134 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
135 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
136 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
137 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
138 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
139 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
140 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142 
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
145 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
146 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
147 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
148 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
149 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
150 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
151 	[PACKET_FENCE]		= sizeof(struct packet_fence),
152 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
153 	[PACKET_NOP]		= sizeof(struct packet_nop),
154 	[PACKET_STOP]		= sizeof(struct packet_stop),
155 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
156 	[PACKET_WAIT]		= sizeof(struct packet_wait),
157 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
158 };
159 
160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 	switch (id) {
163 	case PACKET_WREG_32:
164 	case PACKET_WREG_BULK:
165 	case PACKET_MSG_LONG:
166 	case PACKET_MSG_SHORT:
167 	case PACKET_CP_DMA:
168 	case PACKET_REPEAT:
169 	case PACKET_MSG_PROT:
170 	case PACKET_FENCE:
171 	case PACKET_LIN_DMA:
172 	case PACKET_NOP:
173 	case PACKET_STOP:
174 	case PACKET_ARB_POINT:
175 	case PACKET_WAIT:
176 	case PACKET_LOAD_AND_EXE:
177 		return true;
178 	default:
179 		return false;
180 	}
181 }
182 
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 	"tpc_address_exceed_slm",
186 	"tpc_div_by_0",
187 	"tpc_spu_mac_overflow",
188 	"tpc_spu_addsub_overflow",
189 	"tpc_spu_abs_overflow",
190 	"tpc_spu_fp_dst_nan_inf",
191 	"tpc_spu_fp_dst_denorm",
192 	"tpc_vpu_mac_overflow",
193 	"tpc_vpu_addsub_overflow",
194 	"tpc_vpu_abs_overflow",
195 	"tpc_vpu_fp_dst_nan_inf",
196 	"tpc_vpu_fp_dst_denorm",
197 	"tpc_assertions",
198 	"tpc_illegal_instruction",
199 	"tpc_pc_wrap_around",
200 	"tpc_qm_sw_err",
201 	"tpc_hbw_rresp_err",
202 	"tpc_hbw_bresp_err",
203 	"tpc_lbw_rresp_err",
204 	"tpc_lbw_bresp_err"
205 };
206 
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 	"PQ AXI HBW error",
210 	"CQ AXI HBW error",
211 	"CP AXI HBW error",
212 	"CP error due to undefined OPCODE",
213 	"CP encountered STOP OPCODE",
214 	"CP AXI LBW error",
215 	"CP WRREG32 or WRBULK returned error",
216 	"N/A",
217 	"FENCE 0 inc over max value and clipped",
218 	"FENCE 1 inc over max value and clipped",
219 	"FENCE 2 inc over max value and clipped",
220 	"FENCE 3 inc over max value and clipped",
221 	"FENCE 0 dec under min value and clipped",
222 	"FENCE 1 dec under min value and clipped",
223 	"FENCE 2 dec under min value and clipped",
224 	"FENCE 3 dec under min value and clipped"
225 };
226 
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 	"Choice push while full error",
230 	"Choice Q watchdog error",
231 	"MSG AXI LBW returned with error"
232 };
233 
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349 
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379 
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393 
394 static s64 gaudi_state_dump_specs_props[] = {
395 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 	[SP_MON_OBJ_WR_ADDR_LOW] =
399 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 	[SP_MON_OBJ_WR_ADDR_HIGH] =
401 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 	[SP_FENCE0_CNT_OFFSET] =
423 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 	[SP_FENCE0_RDATA_OFFSET] =
425 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_NUM_CORES] = 1,
428 };
429 
430 static const int gaudi_queue_id_to_engine_id[] = {
431 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461 
462 /* The order here is opposite to the order of the indexing in the h/w.
463  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464  */
465 static const char * const gaudi_sync_manager_names[] = {
466 	"SYNC_MGR_E_N",
467 	"SYNC_MGR_W_N",
468 	"SYNC_MGR_E_S",
469 	"SYNC_MGR_W_S",
470 	NULL
471 };
472 
473 struct ecc_info_extract_params {
474 	u64 block_address;
475 	u32 num_memories;
476 	bool derr;
477 };
478 
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 								u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 					struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 					u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 					u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 				u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 				u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 				struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 		return HL_COLLECTIVE_MASTER;
502 
503 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 		return HL_COLLECTIVE_SLAVE;
506 
507 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 		return HL_COLLECTIVE_SLAVE;
510 
511 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 		return HL_COLLECTIVE_SLAVE;
514 
515 	return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517 
518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 	struct asic_fixed_properties *prop = &hdev->asic_prop;
521 
522 	if (hdev->card_type == cpucp_card_type_pmc) {
523 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524 
525 		if (prop->fw_security_enabled)
526 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 		else
528 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 	} else {
530 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 	}
533 }
534 
535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 	struct asic_fixed_properties *prop = &hdev->asic_prop;
538 	u32 num_sync_stream_queues = 0;
539 	int i;
540 
541 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 	prop->hw_queues_props = kcalloc(prop->max_queues,
543 			sizeof(struct hw_queue_properties),
544 			GFP_KERNEL);
545 
546 	if (!prop->hw_queues_props)
547 		return -ENOMEM;
548 
549 	for (i = 0 ; i < prop->max_queues ; i++) {
550 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 			prop->hw_queues_props[i].driver_only = 0;
553 			prop->hw_queues_props[i].supports_sync_stream = 1;
554 			prop->hw_queues_props[i].cb_alloc_flags =
555 				CB_ALLOC_KERNEL;
556 			num_sync_stream_queues++;
557 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 			prop->hw_queues_props[i].driver_only = 1;
560 			prop->hw_queues_props[i].supports_sync_stream = 0;
561 			prop->hw_queues_props[i].cb_alloc_flags =
562 				CB_ALLOC_KERNEL;
563 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 			prop->hw_queues_props[i].driver_only = 0;
566 			prop->hw_queues_props[i].supports_sync_stream = 0;
567 			prop->hw_queues_props[i].cb_alloc_flags =
568 				CB_ALLOC_USER;
569 
570 		}
571 		prop->hw_queues_props[i].collective_mode =
572 						get_collective_mode(hdev, i);
573 	}
574 
575 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 	prop->cfg_base_address = CFG_BASE;
577 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 	prop->host_base_address = HOST_PHYS_BASE;
579 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 	prop->collective_first_sob = 0;
583 	prop->collective_first_mon = 0;
584 
585 	/* 2 SOBs per internal queue stream are reserved for collective */
586 	prop->sync_stream_first_sob =
587 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 			* QMAN_STREAMS * HL_RSVD_SOBS;
589 
590 	/* 1 monitor per internal queue stream are reserved for collective
591 	 * 2 monitors per external queue stream are reserved for collective
592 	 */
593 	prop->sync_stream_first_mon =
594 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 			(NUMBER_OF_EXT_HW_QUEUES * 2);
596 
597 	prop->dram_base_address = DRAM_PHYS_BASE;
598 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601 
602 	prop->sram_base_address = SRAM_BASE_ADDR;
603 	prop->sram_size = SRAM_SIZE;
604 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 	prop->sram_user_base_address =
606 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607 
608 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610 
611 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 	if (hdev->pldm)
613 		prop->mmu_pgt_size = 0x800000; /* 8MB */
614 	else
615 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 	prop->mmu_pte_size = HL_PTE_SIZE;
617 	prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE;
618 	prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
619 	prop->dram_page_size = PAGE_SIZE_2MB;
620 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
621 	prop->dram_supports_virtual_memory = false;
622 
623 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
624 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
625 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
626 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
627 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
628 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
629 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
630 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
631 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
632 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
633 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
634 	prop->pmmu.end_addr =
635 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
636 	prop->pmmu.page_size = PAGE_SIZE_4KB;
637 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
638 	prop->pmmu.last_mask = LAST_MASK;
639 	/* TODO: will be duplicated until implementing per-MMU props */
640 	prop->pmmu.hop_table_size = prop->mmu_hop_table_size;
641 	prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size;
642 
643 	/* PMMU and HPMMU are the same except of page size */
644 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
645 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
646 
647 	/* shifts and masks are the same in PMMU and DMMU */
648 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
649 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
650 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
651 	prop->dmmu.page_size = PAGE_SIZE_2MB;
652 
653 	prop->cfg_size = CFG_SIZE;
654 	prop->max_asid = MAX_ASID;
655 	prop->num_of_events = GAUDI_EVENT_SIZE;
656 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
657 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
658 
659 	set_default_power_values(hdev);
660 
661 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
662 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
663 
664 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
665 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
666 
667 	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
668 					CARD_NAME_MAX_LEN);
669 
670 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
671 
672 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
673 			prop->sync_stream_first_sob +
674 			(num_sync_stream_queues * HL_RSVD_SOBS);
675 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
676 			prop->sync_stream_first_mon +
677 			(num_sync_stream_queues * HL_RSVD_MONS);
678 
679 	prop->first_available_user_interrupt = USHRT_MAX;
680 	prop->tpc_interrupt_id = USHRT_MAX;
681 
682 	/* single msi */
683 	prop->eq_interrupt_id = 0;
684 
685 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
686 		prop->first_available_cq[i] = USHRT_MAX;
687 
688 	prop->fw_cpu_boot_dev_sts0_valid = false;
689 	prop->fw_cpu_boot_dev_sts1_valid = false;
690 	prop->hard_reset_done_by_fw = false;
691 	prop->gic_interrupts_enable = true;
692 
693 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
694 
695 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
696 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
697 
698 	prop->use_get_power_for_reset_history = true;
699 
700 	prop->configurable_stop_on_err = true;
701 
702 	prop->set_max_power_on_device_init = true;
703 
704 	prop->dma_mask = 48;
705 
706 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
707 
708 	return 0;
709 }
710 
711 static int gaudi_pci_bars_map(struct hl_device *hdev)
712 {
713 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
714 	bool is_wc[3] = {false, false, true};
715 	int rc;
716 
717 	rc = hl_pci_bars_map(hdev, name, is_wc);
718 	if (rc)
719 		return rc;
720 
721 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
722 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
723 
724 	return 0;
725 }
726 
727 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
728 {
729 	struct gaudi_device *gaudi = hdev->asic_specific;
730 	struct hl_inbound_pci_region pci_region;
731 	u64 old_addr = addr;
732 	int rc;
733 
734 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
735 		return old_addr;
736 
737 	if (hdev->asic_prop.iatu_done_by_fw)
738 		return U64_MAX;
739 
740 	/* Inbound Region 2 - Bar 4 - Point to HBM */
741 	pci_region.mode = PCI_BAR_MATCH_MODE;
742 	pci_region.bar = HBM_BAR_ID;
743 	pci_region.addr = addr;
744 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
745 	if (rc)
746 		return U64_MAX;
747 
748 	if (gaudi) {
749 		old_addr = gaudi->hbm_bar_cur_addr;
750 		gaudi->hbm_bar_cur_addr = addr;
751 	}
752 
753 	return old_addr;
754 }
755 
756 static int gaudi_init_iatu(struct hl_device *hdev)
757 {
758 	struct hl_inbound_pci_region inbound_region;
759 	struct hl_outbound_pci_region outbound_region;
760 	int rc;
761 
762 	if (hdev->asic_prop.iatu_done_by_fw)
763 		return 0;
764 
765 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
766 	inbound_region.mode = PCI_BAR_MATCH_MODE;
767 	inbound_region.bar = SRAM_BAR_ID;
768 	inbound_region.addr = SRAM_BASE_ADDR;
769 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
770 	if (rc)
771 		goto done;
772 
773 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
774 	inbound_region.mode = PCI_BAR_MATCH_MODE;
775 	inbound_region.bar = CFG_BAR_ID;
776 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
777 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
778 	if (rc)
779 		goto done;
780 
781 	/* Inbound Region 2 - Bar 4 - Point to HBM */
782 	inbound_region.mode = PCI_BAR_MATCH_MODE;
783 	inbound_region.bar = HBM_BAR_ID;
784 	inbound_region.addr = DRAM_PHYS_BASE;
785 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
786 	if (rc)
787 		goto done;
788 
789 	/* Outbound Region 0 - Point to Host */
790 	outbound_region.addr = HOST_PHYS_BASE;
791 	outbound_region.size = HOST_PHYS_SIZE;
792 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
793 
794 done:
795 	return rc;
796 }
797 
798 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
799 {
800 	return RREG32(mmHW_STATE);
801 }
802 
803 static int gaudi_early_init(struct hl_device *hdev)
804 {
805 	struct asic_fixed_properties *prop = &hdev->asic_prop;
806 	struct pci_dev *pdev = hdev->pdev;
807 	resource_size_t pci_bar_size;
808 	u32 fw_boot_status;
809 	int rc;
810 
811 	rc = gaudi_set_fixed_properties(hdev);
812 	if (rc) {
813 		dev_err(hdev->dev, "Failed setting fixed properties\n");
814 		return rc;
815 	}
816 
817 	/* Check BAR sizes */
818 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
819 
820 	if (pci_bar_size != SRAM_BAR_SIZE) {
821 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
822 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
823 		rc = -ENODEV;
824 		goto free_queue_props;
825 	}
826 
827 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
828 
829 	if (pci_bar_size != CFG_BAR_SIZE) {
830 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
831 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
832 		rc = -ENODEV;
833 		goto free_queue_props;
834 	}
835 
836 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
837 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
838 
839 	/* If FW security is enabled at this point it means no access to ELBI */
840 	if (hdev->asic_prop.fw_security_enabled) {
841 		hdev->asic_prop.iatu_done_by_fw = true;
842 
843 		/*
844 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
845 		 * decision can only be taken based on PCI ID security.
846 		 */
847 		hdev->asic_prop.gic_interrupts_enable = false;
848 		goto pci_init;
849 	}
850 
851 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
852 				&fw_boot_status);
853 	if (rc)
854 		goto free_queue_props;
855 
856 	/* Check whether FW is configuring iATU */
857 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
858 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
859 		hdev->asic_prop.iatu_done_by_fw = true;
860 
861 pci_init:
862 	rc = hl_pci_init(hdev);
863 	if (rc)
864 		goto free_queue_props;
865 
866 	/* Before continuing in the initialization, we need to read the preboot
867 	 * version to determine whether we run with a security-enabled firmware
868 	 */
869 	rc = hl_fw_read_preboot_status(hdev);
870 	if (rc) {
871 		if (hdev->reset_on_preboot_fail)
872 			/* we are already on failure flow, so don't check if hw_fini fails. */
873 			hdev->asic_funcs->hw_fini(hdev, true, false);
874 		goto pci_fini;
875 	}
876 
877 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
878 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
879 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
880 		if (rc) {
881 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
882 			goto pci_fini;
883 		}
884 	}
885 
886 	return 0;
887 
888 pci_fini:
889 	hl_pci_fini(hdev);
890 free_queue_props:
891 	kfree(hdev->asic_prop.hw_queues_props);
892 	return rc;
893 }
894 
895 static int gaudi_early_fini(struct hl_device *hdev)
896 {
897 	kfree(hdev->asic_prop.hw_queues_props);
898 	hl_pci_fini(hdev);
899 
900 	return 0;
901 }
902 
903 /**
904  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
905  *
906  * @hdev: pointer to hl_device structure
907  *
908  */
909 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
910 {
911 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
912 	struct asic_fixed_properties *prop = &hdev->asic_prop;
913 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
914 	int rc;
915 
916 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
917 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
918 		struct gaudi_device *gaudi = hdev->asic_specific;
919 
920 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
921 			return 0;
922 
923 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
924 
925 		if (rc)
926 			return rc;
927 
928 		freq = pll_freq_arr[2];
929 	} else {
930 		/* Backward compatibility */
931 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
932 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
933 		nr = RREG32(mmPSOC_CPU_PLL_NR);
934 		nf = RREG32(mmPSOC_CPU_PLL_NF);
935 		od = RREG32(mmPSOC_CPU_PLL_OD);
936 
937 		if (div_sel == DIV_SEL_REF_CLK ||
938 				div_sel == DIV_SEL_DIVIDED_REF) {
939 			if (div_sel == DIV_SEL_REF_CLK)
940 				freq = PLL_REF_CLK;
941 			else
942 				freq = PLL_REF_CLK / (div_fctr + 1);
943 		} else if (div_sel == DIV_SEL_PLL_CLK ||
944 			div_sel == DIV_SEL_DIVIDED_PLL) {
945 			pll_clk = PLL_REF_CLK * (nf + 1) /
946 					((nr + 1) * (od + 1));
947 			if (div_sel == DIV_SEL_PLL_CLK)
948 				freq = pll_clk;
949 			else
950 				freq = pll_clk / (div_fctr + 1);
951 		} else {
952 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
953 			freq = 0;
954 		}
955 	}
956 
957 	prop->psoc_timestamp_frequency = freq;
958 	prop->psoc_pci_pll_nr = nr;
959 	prop->psoc_pci_pll_nf = nf;
960 	prop->psoc_pci_pll_od = od;
961 	prop->psoc_pci_pll_div_factor = div_fctr;
962 
963 	return 0;
964 }
965 
966 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
967 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
968 {
969 	struct asic_fixed_properties *prop = &hdev->asic_prop;
970 	struct packet_lin_dma *init_tpc_mem_pkt;
971 	struct hl_cs_job *job;
972 	struct hl_cb *cb;
973 	u64 dst_addr;
974 	u32 cb_size, ctl;
975 	u8 tpc_id;
976 	int rc;
977 
978 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
979 	if (!cb)
980 		return -EFAULT;
981 
982 	init_tpc_mem_pkt = cb->kernel_address;
983 	cb_size = sizeof(*init_tpc_mem_pkt);
984 	memset(init_tpc_mem_pkt, 0, cb_size);
985 
986 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
987 
988 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
989 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
990 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
991 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
992 
993 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
994 
995 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
996 
997 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
998 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
999 				round_up(prop->sram_user_base_address, SZ_8K));
1000 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1001 
1002 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1003 	if (!job) {
1004 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1005 		rc = -ENOMEM;
1006 		goto release_cb;
1007 	}
1008 
1009 	job->id = 0;
1010 	job->user_cb = cb;
1011 	atomic_inc(&job->user_cb->cs_cnt);
1012 	job->user_cb_size = cb_size;
1013 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1014 	job->patched_cb = job->user_cb;
1015 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1016 
1017 	hl_debugfs_add_job(hdev, job);
1018 
1019 	rc = gaudi_send_job_on_qman0(hdev, job);
1020 
1021 	if (rc)
1022 		goto free_job;
1023 
1024 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1025 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1026 		if (rc)
1027 			break;
1028 	}
1029 
1030 free_job:
1031 	hl_userptr_delete_list(hdev, &job->userptr_list);
1032 	hl_debugfs_remove_job(hdev, job);
1033 	kfree(job);
1034 	atomic_dec(&cb->cs_cnt);
1035 
1036 release_cb:
1037 	hl_cb_put(cb);
1038 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1039 
1040 	return rc;
1041 }
1042 
1043 /*
1044  * gaudi_init_tpc_mem() - Initialize TPC memories.
1045  * @hdev: Pointer to hl_device structure.
1046  *
1047  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1048  *
1049  * Return: 0 for success, negative value for error.
1050  */
1051 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1052 {
1053 	const struct firmware *fw;
1054 	size_t fw_size;
1055 	void *cpu_addr;
1056 	dma_addr_t dma_handle;
1057 	int rc, count = 5;
1058 
1059 again:
1060 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1061 	if (rc == -EINTR && count-- > 0) {
1062 		msleep(50);
1063 		goto again;
1064 	}
1065 
1066 	if (rc) {
1067 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1068 				GAUDI_TPC_FW_FILE);
1069 		goto out;
1070 	}
1071 
1072 	fw_size = fw->size;
1073 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1074 	if (!cpu_addr) {
1075 		dev_err(hdev->dev,
1076 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1077 			fw_size);
1078 		rc = -ENOMEM;
1079 		goto out;
1080 	}
1081 
1082 	memcpy(cpu_addr, fw->data, fw_size);
1083 
1084 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1085 
1086 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1087 
1088 out:
1089 	release_firmware(fw);
1090 	return rc;
1091 }
1092 
1093 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1094 {
1095 	struct gaudi_device *gaudi = hdev->asic_specific;
1096 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1097 	struct hl_hw_queue *q;
1098 	u32 i, sob_id, sob_group_id, queue_id;
1099 
1100 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1101 	sob_group_id =
1102 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1103 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1104 
1105 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1106 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1107 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1108 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1109 	}
1110 
1111 	/* Both DMA5 and TPC7 use the same resources since only a single
1112 	 * engine need to participate in the reduction process
1113 	 */
1114 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1115 	q = &hdev->kernel_queues[queue_id];
1116 	q->sync_stream_prop.collective_sob_id =
1117 			sob_id + NIC_NUMBER_OF_ENGINES;
1118 
1119 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1120 	q = &hdev->kernel_queues[queue_id];
1121 	q->sync_stream_prop.collective_sob_id =
1122 			sob_id + NIC_NUMBER_OF_ENGINES;
1123 }
1124 
1125 static void gaudi_sob_group_hw_reset(struct kref *ref)
1126 {
1127 	struct gaudi_hw_sob_group *hw_sob_group =
1128 		container_of(ref, struct gaudi_hw_sob_group, kref);
1129 	struct hl_device *hdev = hw_sob_group->hdev;
1130 	int i;
1131 
1132 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1133 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1134 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1135 
1136 	kref_init(&hw_sob_group->kref);
1137 }
1138 
1139 static void gaudi_sob_group_reset_error(struct kref *ref)
1140 {
1141 	struct gaudi_hw_sob_group *hw_sob_group =
1142 		container_of(ref, struct gaudi_hw_sob_group, kref);
1143 	struct hl_device *hdev = hw_sob_group->hdev;
1144 
1145 	dev_crit(hdev->dev,
1146 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1147 		hw_sob_group->base_sob_id);
1148 }
1149 
1150 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1151 {
1152 	struct gaudi_collective_properties *prop;
1153 	int i;
1154 
1155 	prop = &gaudi->collective_props;
1156 
1157 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1158 
1159 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1160 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1161 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1162 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1163 	/* Set collective engine bit */
1164 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1165 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1166 }
1167 
1168 static int gaudi_collective_init(struct hl_device *hdev)
1169 {
1170 	u32 i, sob_id, reserved_sobs_per_group;
1171 	struct gaudi_collective_properties *prop;
1172 	struct gaudi_device *gaudi;
1173 
1174 	gaudi = hdev->asic_specific;
1175 	prop = &gaudi->collective_props;
1176 	sob_id = hdev->asic_prop.collective_first_sob;
1177 
1178 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1179 	reserved_sobs_per_group =
1180 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1181 
1182 	/* Init SOB groups */
1183 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1184 		prop->hw_sob_group[i].hdev = hdev;
1185 		prop->hw_sob_group[i].base_sob_id = sob_id;
1186 		sob_id += reserved_sobs_per_group;
1187 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1188 	}
1189 
1190 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1191 		prop->next_sob_group_val[i] = 1;
1192 		prop->curr_sob_group_idx[i] = 0;
1193 		gaudi_collective_map_sobs(hdev, i);
1194 	}
1195 
1196 	gaudi_collective_mstr_sob_mask_set(gaudi);
1197 
1198 	return 0;
1199 }
1200 
1201 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1202 {
1203 	struct gaudi_device *gaudi = hdev->asic_specific;
1204 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1205 
1206 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1207 					gaudi_sob_group_hw_reset);
1208 }
1209 
1210 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1211 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1212 {
1213 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1214 	struct gaudi_collective_properties *cprop;
1215 	struct hl_gen_wait_properties wait_prop;
1216 	struct hl_sync_stream_properties *prop;
1217 	struct gaudi_device *gaudi;
1218 
1219 	gaudi = hdev->asic_specific;
1220 	cprop = &gaudi->collective_props;
1221 	queue_id = job->hw_queue_id;
1222 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1223 
1224 	master_sob_base =
1225 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1226 	master_monitor = prop->collective_mstr_mon_id[0];
1227 
1228 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1229 
1230 	dev_dbg(hdev->dev,
1231 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1232 		master_sob_base, cprop->mstr_sob_mask[0],
1233 		cprop->next_sob_group_val[stream],
1234 		master_monitor, queue_id);
1235 
1236 	wait_prop.data = (void *) job->patched_cb;
1237 	wait_prop.sob_base = master_sob_base;
1238 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1239 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1240 	wait_prop.mon_id = master_monitor;
1241 	wait_prop.q_idx = queue_id;
1242 	wait_prop.size = cb_size;
1243 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1244 
1245 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1246 	master_monitor = prop->collective_mstr_mon_id[1];
1247 
1248 	dev_dbg(hdev->dev,
1249 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1250 		master_sob_base, cprop->mstr_sob_mask[1],
1251 		cprop->next_sob_group_val[stream],
1252 		master_monitor, queue_id);
1253 
1254 	wait_prop.sob_base = master_sob_base;
1255 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1256 	wait_prop.mon_id = master_monitor;
1257 	wait_prop.size = cb_size;
1258 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1259 }
1260 
1261 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1262 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1263 {
1264 	struct hl_gen_wait_properties wait_prop;
1265 	struct hl_sync_stream_properties *prop;
1266 	u32 queue_id, cb_size = 0;
1267 
1268 	queue_id = job->hw_queue_id;
1269 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1270 
1271 	if (job->cs->encaps_signals) {
1272 		/* use the encaps signal handle store earlier in the flow
1273 		 * and set the SOB information from the encaps
1274 		 * signals handle
1275 		 */
1276 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1277 						cs_cmpl);
1278 
1279 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1280 				job->cs->sequence,
1281 				cs_cmpl->hw_sob->sob_id,
1282 				cs_cmpl->sob_val);
1283 	}
1284 
1285 	/* Add to wait CBs using slave monitor */
1286 	wait_prop.data = (void *) job->user_cb;
1287 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1288 	wait_prop.sob_mask = 0x1;
1289 	wait_prop.sob_val = cs_cmpl->sob_val;
1290 	wait_prop.mon_id = prop->collective_slave_mon_id;
1291 	wait_prop.q_idx = queue_id;
1292 	wait_prop.size = cb_size;
1293 
1294 	dev_dbg(hdev->dev,
1295 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1296 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1297 		prop->collective_slave_mon_id, queue_id);
1298 
1299 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1300 
1301 	dev_dbg(hdev->dev,
1302 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1303 		prop->collective_sob_id, queue_id);
1304 
1305 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1306 			prop->collective_sob_id, cb_size, false);
1307 }
1308 
1309 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1310 {
1311 	struct hl_cs_compl *signal_cs_cmpl =
1312 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1313 	struct hl_cs_compl *cs_cmpl =
1314 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1315 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1316 	struct gaudi_collective_properties *cprop;
1317 	u32 stream, queue_id, sob_group_offset;
1318 	struct gaudi_device *gaudi;
1319 	struct hl_device *hdev;
1320 	struct hl_cs_job *job;
1321 	struct hl_ctx *ctx;
1322 
1323 	ctx = cs->ctx;
1324 	hdev = ctx->hdev;
1325 	gaudi = hdev->asic_specific;
1326 	cprop = &gaudi->collective_props;
1327 
1328 	if (cs->encaps_signals) {
1329 		cs_cmpl->hw_sob = handle->hw_sob;
1330 		/* at this checkpoint we only need the hw_sob pointer
1331 		 * for the completion check before start going over the jobs
1332 		 * of the master/slaves, the sob_value will be taken later on
1333 		 * in gaudi_collective_slave_init_job depends on each
1334 		 * job wait offset value.
1335 		 */
1336 		cs_cmpl->sob_val = 0;
1337 	} else {
1338 		/* copy the SOB id and value of the signal CS */
1339 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1340 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1341 	}
1342 
1343 	/* check again if the signal cs already completed.
1344 	 * if yes then don't send any wait cs since the hw_sob
1345 	 * could be in reset already. if signal is not completed
1346 	 * then get refcount to hw_sob to prevent resetting the sob
1347 	 * while wait cs is not submitted.
1348 	 * note that this check is protected by two locks,
1349 	 * hw queue lock and completion object lock,
1350 	 * and the same completion object lock also protects
1351 	 * the hw_sob reset handler function.
1352 	 * The hw_queue lock prevent out of sync of hw_sob
1353 	 * refcount value, changed by signal/wait flows.
1354 	 */
1355 	spin_lock(&signal_cs_cmpl->lock);
1356 
1357 	if (completion_done(&cs->signal_fence->completion)) {
1358 		spin_unlock(&signal_cs_cmpl->lock);
1359 		return -EINVAL;
1360 	}
1361 	/* Increment kref since all slave queues are now waiting on it */
1362 	kref_get(&cs_cmpl->hw_sob->kref);
1363 
1364 	spin_unlock(&signal_cs_cmpl->lock);
1365 
1366 	/* Calculate the stream from collective master queue (1st job) */
1367 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1368 	stream = job->hw_queue_id % 4;
1369 	sob_group_offset =
1370 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1371 
1372 	list_for_each_entry(job, &cs->job_list, cs_node) {
1373 		queue_id = job->hw_queue_id;
1374 
1375 		if (hdev->kernel_queues[queue_id].collective_mode ==
1376 				HL_COLLECTIVE_MASTER)
1377 			gaudi_collective_master_init_job(hdev, job, stream,
1378 						sob_group_offset);
1379 		else
1380 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1381 	}
1382 
1383 	cs_cmpl->sob_group = sob_group_offset;
1384 
1385 	/* Handle sob group kref and wraparound */
1386 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1387 	cprop->next_sob_group_val[stream]++;
1388 
1389 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1390 		/*
1391 		 * Decrement as we reached the max value.
1392 		 * The release function won't be called here as we've
1393 		 * just incremented the refcount.
1394 		 */
1395 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1396 				gaudi_sob_group_reset_error);
1397 		cprop->next_sob_group_val[stream] = 1;
1398 		/* only two SOBs are currently in use */
1399 		cprop->curr_sob_group_idx[stream] =
1400 			(cprop->curr_sob_group_idx[stream] + 1) &
1401 							(HL_RSVD_SOBS - 1);
1402 
1403 		gaudi_collective_map_sobs(hdev, stream);
1404 
1405 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1406 				cprop->curr_sob_group_idx[stream], stream);
1407 	}
1408 
1409 	mb();
1410 	hl_fence_put(cs->signal_fence);
1411 	cs->signal_fence = NULL;
1412 
1413 	return 0;
1414 }
1415 
1416 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1417 {
1418 	u32 cacheline_end, additional_commands;
1419 
1420 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1421 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1422 
1423 	if (user_cb_size + additional_commands > cacheline_end)
1424 		return cacheline_end - user_cb_size + additional_commands;
1425 	else
1426 		return additional_commands;
1427 }
1428 
1429 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1430 		struct hl_ctx *ctx, struct hl_cs *cs,
1431 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1432 		u32 encaps_signal_offset)
1433 {
1434 	struct hw_queue_properties *hw_queue_prop;
1435 	struct hl_cs_counters_atomic *cntr;
1436 	struct hl_cs_job *job;
1437 	struct hl_cb *cb;
1438 	u32 cb_size;
1439 	bool patched_cb;
1440 
1441 	cntr = &hdev->aggregated_cs_counters;
1442 
1443 	if (mode == HL_COLLECTIVE_MASTER) {
1444 		/* CB size of collective master queue contains
1445 		 * 4 msg short packets for monitor 1 configuration
1446 		 * 1 fence packet
1447 		 * 4 msg short packets for monitor 2 configuration
1448 		 * 1 fence packet
1449 		 * 2 msg prot packets for completion and MSI
1450 		 */
1451 		cb_size = sizeof(struct packet_msg_short) * 8 +
1452 				sizeof(struct packet_fence) * 2 +
1453 				sizeof(struct packet_msg_prot) * 2;
1454 		patched_cb = true;
1455 	} else {
1456 		/* CB size of collective slave queues contains
1457 		 * 4 msg short packets for monitor configuration
1458 		 * 1 fence packet
1459 		 * 1 additional msg short packet for sob signal
1460 		 */
1461 		cb_size = sizeof(struct packet_msg_short) * 5 +
1462 				sizeof(struct packet_fence);
1463 		patched_cb = false;
1464 	}
1465 
1466 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1467 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1468 	if (!job) {
1469 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1470 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1471 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1472 		return -ENOMEM;
1473 	}
1474 
1475 	/* Allocate internal mapped CB for non patched CBs */
1476 	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1477 	if (!cb) {
1478 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1479 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1480 		kfree(job);
1481 		return -EFAULT;
1482 	}
1483 
1484 	job->id = 0;
1485 	job->cs = cs;
1486 	job->user_cb = cb;
1487 	atomic_inc(&job->user_cb->cs_cnt);
1488 	job->user_cb_size = cb_size;
1489 	job->hw_queue_id = queue_id;
1490 
1491 	/* since its guaranteed to have only one chunk in the collective wait
1492 	 * cs, we can use this chunk to set the encapsulated signal offset
1493 	 * in the jobs.
1494 	 */
1495 	if (cs->encaps_signals)
1496 		job->encaps_sig_wait_offset = encaps_signal_offset;
1497 
1498 	/*
1499 	 * No need in parsing, user CB is the patched CB.
1500 	 * We call hl_cb_destroy() out of two reasons - we don't need
1501 	 * the CB in the CB idr anymore and to decrement its refcount as
1502 	 * it was incremented inside hl_cb_kernel_create().
1503 	 */
1504 	if (patched_cb)
1505 		job->patched_cb = job->user_cb;
1506 	else
1507 		job->patched_cb = NULL;
1508 
1509 	job->job_cb_size = job->user_cb_size;
1510 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1511 
1512 	/* increment refcount as for external queues we get completion */
1513 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1514 		cs_get(cs);
1515 
1516 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1517 
1518 	list_add_tail(&job->cs_node, &cs->job_list);
1519 
1520 	hl_debugfs_add_job(hdev, job);
1521 
1522 	return 0;
1523 }
1524 
1525 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1526 		struct hl_ctx *ctx, struct hl_cs *cs,
1527 		u32 wait_queue_id, u32 collective_engine_id,
1528 		u32 encaps_signal_offset)
1529 {
1530 	struct gaudi_device *gaudi = hdev->asic_specific;
1531 	struct hw_queue_properties *hw_queue_prop;
1532 	u32 queue_id, collective_queue, num_jobs;
1533 	u32 stream, nic_queue, nic_idx = 0;
1534 	bool skip;
1535 	int i, rc = 0;
1536 
1537 	/* Verify wait queue id is configured as master */
1538 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1539 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1540 		dev_err(hdev->dev,
1541 			"Queue %d is not configured as collective master\n",
1542 			wait_queue_id);
1543 		return -EINVAL;
1544 	}
1545 
1546 	/* Verify engine id is supported */
1547 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1548 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1549 		dev_err(hdev->dev,
1550 			"Collective wait does not support engine %u\n",
1551 			collective_engine_id);
1552 		return -EINVAL;
1553 	}
1554 
1555 	stream = wait_queue_id % 4;
1556 
1557 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1558 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1559 	else
1560 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1561 
1562 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1563 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1564 
1565 	/* First job goes to the collective master queue, it will wait for
1566 	 * the collective slave queues to finish execution.
1567 	 * The synchronization is done using two monitors:
1568 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1569 	 * reduction engine (DMA5/TPC7).
1570 	 *
1571 	 * Rest of the jobs goes to the collective slave queues which will
1572 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1573 	 */
1574 	for (i = 0 ; i < num_jobs ; i++) {
1575 		if (i == 0) {
1576 			queue_id = wait_queue_id;
1577 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1578 				HL_COLLECTIVE_MASTER, queue_id,
1579 				wait_queue_id, encaps_signal_offset);
1580 		} else {
1581 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1582 				if (gaudi->hw_cap_initialized &
1583 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1584 					skip = false;
1585 				else
1586 					skip = true;
1587 
1588 				queue_id = nic_queue;
1589 				nic_queue += 4;
1590 				nic_idx++;
1591 
1592 				if (skip)
1593 					continue;
1594 			} else {
1595 				queue_id = collective_queue;
1596 			}
1597 
1598 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1599 				HL_COLLECTIVE_SLAVE, queue_id,
1600 				wait_queue_id, encaps_signal_offset);
1601 		}
1602 
1603 		if (rc)
1604 			return rc;
1605 	}
1606 
1607 	return rc;
1608 }
1609 
1610 static int gaudi_late_init(struct hl_device *hdev)
1611 {
1612 	struct gaudi_device *gaudi = hdev->asic_specific;
1613 	int rc;
1614 
1615 	rc = gaudi->cpucp_info_get(hdev);
1616 	if (rc) {
1617 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1618 		return rc;
1619 	}
1620 
1621 	if ((hdev->card_type == cpucp_card_type_pci) &&
1622 			(hdev->nic_ports_mask & 0x3)) {
1623 		dev_info(hdev->dev,
1624 			"PCI card detected, only 8 ports are enabled\n");
1625 		hdev->nic_ports_mask &= ~0x3;
1626 
1627 		/* Stop and disable unused NIC QMANs */
1628 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1629 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1630 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1631 
1632 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1633 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1634 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1635 
1636 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1637 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1638 
1639 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1640 	}
1641 
1642 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1643 	if (rc) {
1644 		dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
1645 		return rc;
1646 	}
1647 
1648 	/* Scrub both SRAM and DRAM */
1649 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1650 	if (rc)
1651 		goto disable_pci_access;
1652 
1653 	rc = gaudi_fetch_psoc_frequency(hdev);
1654 	if (rc) {
1655 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1656 		goto disable_pci_access;
1657 	}
1658 
1659 	rc = gaudi_mmu_clear_pgt_range(hdev);
1660 	if (rc) {
1661 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1662 		goto disable_pci_access;
1663 	}
1664 
1665 	rc = gaudi_init_tpc_mem(hdev);
1666 	if (rc) {
1667 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1668 		goto disable_pci_access;
1669 	}
1670 
1671 	rc = gaudi_collective_init(hdev);
1672 	if (rc) {
1673 		dev_err(hdev->dev, "Failed to init collective\n");
1674 		goto disable_pci_access;
1675 	}
1676 
1677 	/* We only support a single ASID for the user, so for the sake of optimization, just
1678 	 * initialize the ASID one time during device initialization with the fixed value of 1
1679 	 */
1680 	gaudi_mmu_prepare(hdev, 1);
1681 
1682 	hl_fw_set_pll_profile(hdev);
1683 
1684 	return 0;
1685 
1686 disable_pci_access:
1687 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1688 
1689 	return rc;
1690 }
1691 
1692 static void gaudi_late_fini(struct hl_device *hdev)
1693 {
1694 	hl_hwmon_release_resources(hdev);
1695 }
1696 
1697 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1698 {
1699 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1700 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1701 	int i, j, rc = 0;
1702 
1703 	/*
1704 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1705 	 * to '1' when accessing the host.
1706 	 * Bits 49:39 of the full host address are saved for a later
1707 	 * configuration of the HW to perform extension to 50 bits.
1708 	 * Because there is a single HW register that holds the extension bits,
1709 	 * these bits must be identical in all allocated range.
1710 	 */
1711 
1712 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1713 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1714 								&dma_addr_arr[i],
1715 								GFP_KERNEL | __GFP_ZERO);
1716 		if (!virt_addr_arr[i]) {
1717 			rc = -ENOMEM;
1718 			goto free_dma_mem_arr;
1719 		}
1720 
1721 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1722 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1723 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1724 			break;
1725 	}
1726 
1727 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1728 		dev_err(hdev->dev,
1729 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1730 		rc = -EFAULT;
1731 		goto free_dma_mem_arr;
1732 	}
1733 
1734 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1735 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1736 	hdev->cpu_pci_msb_addr =
1737 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1738 
1739 	if (!hdev->asic_prop.fw_security_enabled)
1740 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1741 
1742 free_dma_mem_arr:
1743 	for (j = 0 ; j < i ; j++)
1744 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1745 						dma_addr_arr[j]);
1746 
1747 	return rc;
1748 }
1749 
1750 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1751 {
1752 	struct gaudi_device *gaudi = hdev->asic_specific;
1753 	struct gaudi_internal_qman_info *q;
1754 	u32 i;
1755 
1756 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1757 		q = &gaudi->internal_qmans[i];
1758 		if (!q->pq_kernel_addr)
1759 			continue;
1760 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1761 	}
1762 }
1763 
1764 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1765 {
1766 	struct gaudi_device *gaudi = hdev->asic_specific;
1767 	struct gaudi_internal_qman_info *q;
1768 	int rc, i;
1769 
1770 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1771 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1772 			continue;
1773 
1774 		q = &gaudi->internal_qmans[i];
1775 
1776 		switch (i) {
1777 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1778 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1779 			break;
1780 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1781 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1782 			break;
1783 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1784 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1785 			break;
1786 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1787 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1788 			break;
1789 		default:
1790 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1791 			rc = -EINVAL;
1792 			goto free_internal_qmans_pq_mem;
1793 		}
1794 
1795 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1796 								GFP_KERNEL | __GFP_ZERO);
1797 		if (!q->pq_kernel_addr) {
1798 			rc = -ENOMEM;
1799 			goto free_internal_qmans_pq_mem;
1800 		}
1801 	}
1802 
1803 	return 0;
1804 
1805 free_internal_qmans_pq_mem:
1806 	gaudi_free_internal_qmans_pq_mem(hdev);
1807 	return rc;
1808 }
1809 
1810 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1811 {
1812 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1813 	struct pci_mem_region *region;
1814 
1815 	/* CFG */
1816 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1817 	region->region_base = CFG_BASE;
1818 	region->region_size = CFG_SIZE;
1819 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1820 	region->bar_size = CFG_BAR_SIZE;
1821 	region->bar_id = CFG_BAR_ID;
1822 	region->used = 1;
1823 
1824 	/* SRAM */
1825 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1826 	region->region_base = SRAM_BASE_ADDR;
1827 	region->region_size = SRAM_SIZE;
1828 	region->offset_in_bar = 0;
1829 	region->bar_size = SRAM_BAR_SIZE;
1830 	region->bar_id = SRAM_BAR_ID;
1831 	region->used = 1;
1832 
1833 	/* DRAM */
1834 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1835 	region->region_base = DRAM_PHYS_BASE;
1836 	region->region_size = hdev->asic_prop.dram_size;
1837 	region->offset_in_bar = 0;
1838 	region->bar_size = prop->dram_pci_bar_size;
1839 	region->bar_id = HBM_BAR_ID;
1840 	region->used = 1;
1841 
1842 	/* SP SRAM */
1843 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1844 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1845 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1846 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1847 	region->bar_size = CFG_BAR_SIZE;
1848 	region->bar_id = CFG_BAR_ID;
1849 	region->used = 1;
1850 }
1851 
1852 static int gaudi_sw_init(struct hl_device *hdev)
1853 {
1854 	struct gaudi_device *gaudi;
1855 	u32 i, event_id = 0;
1856 	int rc;
1857 
1858 	/* Allocate device structure */
1859 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1860 	if (!gaudi)
1861 		return -ENOMEM;
1862 
1863 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1864 		if (gaudi_irq_map_table[i].valid) {
1865 			if (event_id == GAUDI_EVENT_SIZE) {
1866 				dev_err(hdev->dev,
1867 					"Event array exceeds the limit of %u events\n",
1868 					GAUDI_EVENT_SIZE);
1869 				rc = -EINVAL;
1870 				goto free_gaudi_device;
1871 			}
1872 
1873 			gaudi->events[event_id++] =
1874 					gaudi_irq_map_table[i].fc_id;
1875 		}
1876 	}
1877 
1878 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1879 
1880 	hdev->asic_specific = gaudi;
1881 
1882 	/* Create DMA pool for small allocations */
1883 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1884 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1885 	if (!hdev->dma_pool) {
1886 		dev_err(hdev->dev, "failed to create DMA pool\n");
1887 		rc = -ENOMEM;
1888 		goto free_gaudi_device;
1889 	}
1890 
1891 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1892 	if (rc)
1893 		goto free_dma_pool;
1894 
1895 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1896 	if (!hdev->cpu_accessible_dma_pool) {
1897 		dev_err(hdev->dev,
1898 			"Failed to create CPU accessible DMA pool\n");
1899 		rc = -ENOMEM;
1900 		goto free_cpu_dma_mem;
1901 	}
1902 
1903 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1904 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1905 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1906 	if (rc) {
1907 		dev_err(hdev->dev,
1908 			"Failed to add memory to CPU accessible DMA pool\n");
1909 		rc = -EFAULT;
1910 		goto free_cpu_accessible_dma_pool;
1911 	}
1912 
1913 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1914 	if (rc)
1915 		goto free_cpu_accessible_dma_pool;
1916 
1917 	spin_lock_init(&gaudi->hw_queues_lock);
1918 
1919 	hdev->supports_sync_stream = true;
1920 	hdev->supports_coresight = true;
1921 	hdev->supports_staged_submission = true;
1922 	hdev->supports_wait_for_multi_cs = true;
1923 
1924 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1925 	hdev->stream_master_qid_arr =
1926 				hdev->asic_funcs->get_stream_master_qid_arr();
1927 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1928 
1929 	return 0;
1930 
1931 free_cpu_accessible_dma_pool:
1932 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1933 free_cpu_dma_mem:
1934 	if (!hdev->asic_prop.fw_security_enabled)
1935 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1936 					hdev->cpu_pci_msb_addr);
1937 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1938 					hdev->cpu_accessible_dma_address);
1939 free_dma_pool:
1940 	dma_pool_destroy(hdev->dma_pool);
1941 free_gaudi_device:
1942 	kfree(gaudi);
1943 	return rc;
1944 }
1945 
1946 static int gaudi_sw_fini(struct hl_device *hdev)
1947 {
1948 	struct gaudi_device *gaudi = hdev->asic_specific;
1949 
1950 	gaudi_free_internal_qmans_pq_mem(hdev);
1951 
1952 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1953 
1954 	if (!hdev->asic_prop.fw_security_enabled)
1955 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1956 					hdev->cpu_pci_msb_addr);
1957 
1958 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1959 					hdev->cpu_accessible_dma_address);
1960 
1961 	dma_pool_destroy(hdev->dma_pool);
1962 
1963 	kfree(gaudi);
1964 
1965 	return 0;
1966 }
1967 
1968 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1969 {
1970 	struct hl_device *hdev = arg;
1971 	int i;
1972 
1973 	if (hdev->disabled)
1974 		return IRQ_HANDLED;
1975 
1976 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1977 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1978 
1979 	hl_irq_handler_eq(irq, &hdev->event_queue);
1980 
1981 	return IRQ_HANDLED;
1982 }
1983 
1984 /*
1985  * For backward compatibility, new MSI interrupts should be set after the
1986  * existing CPU and NIC interrupts.
1987  */
1988 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1989 				bool cpu_eq)
1990 {
1991 	int msi_vec;
1992 
1993 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1994 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1995 				GAUDI_EVENT_QUEUE_MSI_IDX);
1996 
1997 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1998 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1999 
2000 	return pci_irq_vector(hdev->pdev, msi_vec);
2001 }
2002 
2003 static int gaudi_enable_msi_single(struct hl_device *hdev)
2004 {
2005 	int rc, irq;
2006 
2007 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2008 
2009 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2010 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2011 			"gaudi single msi", hdev);
2012 	if (rc)
2013 		dev_err(hdev->dev,
2014 			"Failed to request single MSI IRQ\n");
2015 
2016 	return rc;
2017 }
2018 
2019 static int gaudi_enable_msi(struct hl_device *hdev)
2020 {
2021 	struct gaudi_device *gaudi = hdev->asic_specific;
2022 	int rc;
2023 
2024 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2025 		return 0;
2026 
2027 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2028 	if (rc < 0) {
2029 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2030 		return rc;
2031 	}
2032 
2033 	rc = gaudi_enable_msi_single(hdev);
2034 	if (rc)
2035 		goto free_pci_irq_vectors;
2036 
2037 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2038 
2039 	return 0;
2040 
2041 free_pci_irq_vectors:
2042 	pci_free_irq_vectors(hdev->pdev);
2043 	return rc;
2044 }
2045 
2046 static void gaudi_sync_irqs(struct hl_device *hdev)
2047 {
2048 	struct gaudi_device *gaudi = hdev->asic_specific;
2049 
2050 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2051 		return;
2052 
2053 	/* Wait for all pending IRQs to be finished */
2054 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2055 }
2056 
2057 static void gaudi_disable_msi(struct hl_device *hdev)
2058 {
2059 	struct gaudi_device *gaudi = hdev->asic_specific;
2060 
2061 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2062 		return;
2063 
2064 	gaudi_sync_irqs(hdev);
2065 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2066 	pci_free_irq_vectors(hdev->pdev);
2067 
2068 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2069 }
2070 
2071 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2072 {
2073 	struct gaudi_device *gaudi = hdev->asic_specific;
2074 
2075 	if (hdev->asic_prop.fw_security_enabled)
2076 		return;
2077 
2078 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2079 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2080 		return;
2081 
2082 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2083 		return;
2084 
2085 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2086 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2088 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2090 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2092 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2094 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2096 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2098 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2099 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2100 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101 
2102 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2103 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2105 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2107 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2109 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2111 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2113 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2115 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2116 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2117 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2118 
2119 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2120 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2122 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2124 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2126 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2128 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2130 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2132 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2133 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2134 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2135 
2136 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2137 }
2138 
2139 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2140 {
2141 	struct gaudi_device *gaudi = hdev->asic_specific;
2142 
2143 	if (hdev->asic_prop.fw_security_enabled)
2144 		return;
2145 
2146 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2147 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2148 		return;
2149 
2150 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2151 		return;
2152 
2153 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2154 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2156 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2166 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2167 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 
2170 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2171 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2173 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2175 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2177 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2179 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2181 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2183 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2184 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2185 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2186 
2187 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2188 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2190 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2192 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2194 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2196 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2198 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2200 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2201 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2202 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2203 
2204 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2205 }
2206 
2207 static void gaudi_init_e2e(struct hl_device *hdev)
2208 {
2209 	if (hdev->asic_prop.fw_security_enabled)
2210 		return;
2211 
2212 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2213 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2214 		return;
2215 
2216 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2217 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2218 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2219 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2220 
2221 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2222 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2223 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2224 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2225 
2226 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2227 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2228 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2229 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2230 
2231 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2232 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2233 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2234 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2235 
2236 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2237 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2238 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2239 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2240 
2241 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2242 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2243 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2244 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2245 
2246 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2247 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2248 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2249 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2250 
2251 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2252 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2253 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2254 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2255 
2256 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2257 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2258 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2259 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2260 
2261 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2262 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2263 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2264 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2265 
2266 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2267 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2268 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2269 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2270 
2271 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2272 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2273 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2274 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2275 
2276 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2277 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2278 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2279 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2280 
2281 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2282 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2283 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2284 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2285 
2286 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2287 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2288 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2289 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2290 
2291 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2292 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2293 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2294 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2295 
2296 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2297 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2298 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2299 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2300 
2301 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2302 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2303 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2304 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2305 
2306 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2307 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2308 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2309 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2310 
2311 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2312 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2313 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2314 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2315 
2316 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2317 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2318 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2319 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2320 
2321 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2322 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2323 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2324 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2325 
2326 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2327 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2328 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2329 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2330 
2331 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2332 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2333 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2334 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2335 
2336 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2337 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2338 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2339 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2340 
2341 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2342 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2343 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2344 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2345 
2346 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2347 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2348 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2349 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2350 
2351 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2352 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2353 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2354 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2355 
2356 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2357 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2358 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2359 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2360 
2361 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2362 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2363 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2364 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2365 
2366 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2367 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2368 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2369 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2370 
2371 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2372 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2373 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2374 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2375 
2376 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2377 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2378 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2379 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2380 
2381 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2382 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2383 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2384 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2385 
2386 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2387 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2388 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2389 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2390 
2391 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2392 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2393 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2394 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2395 
2396 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2397 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2398 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2399 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2400 
2401 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2402 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2403 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2404 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2405 
2406 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2407 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2408 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2409 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2410 
2411 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2412 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2413 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2414 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2415 
2416 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2417 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2418 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2419 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2420 
2421 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2422 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2423 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2424 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2425 
2426 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2427 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2428 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2429 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2430 
2431 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2432 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2433 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2434 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2435 
2436 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2437 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2438 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2439 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2440 
2441 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2442 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2443 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2444 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2445 
2446 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2447 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2448 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2449 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2450 
2451 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2452 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2453 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2454 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2455 }
2456 
2457 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2458 {
2459 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2460 
2461 	if (hdev->asic_prop.fw_security_enabled)
2462 		return;
2463 
2464 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2465 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2466 		return;
2467 
2468 	hbm0_wr = 0x33333333;
2469 	hbm0_rd = 0x77777777;
2470 	hbm1_wr = 0x55555555;
2471 	hbm1_rd = 0xDDDDDDDD;
2472 
2473 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2474 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2475 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2476 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2477 
2478 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2479 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2480 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2481 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2482 
2483 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2484 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2485 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2486 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2487 
2488 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2489 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2490 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2491 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2492 
2493 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2494 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2497 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2500 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2503 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505 
2506 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2507 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2510 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2513 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2516 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2517 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2518 }
2519 
2520 static void gaudi_init_golden_registers(struct hl_device *hdev)
2521 {
2522 	u32 tpc_offset;
2523 	int tpc_id, i;
2524 
2525 	gaudi_init_e2e(hdev);
2526 	gaudi_init_hbm_cred(hdev);
2527 
2528 	for (tpc_id = 0, tpc_offset = 0;
2529 				tpc_id < TPC_NUMBER_OF_ENGINES;
2530 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2531 		/* Mask all arithmetic interrupts from TPC */
2532 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2533 		/* Set 16 cache lines */
2534 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2535 				ICACHE_FETCH_LINE_NUM, 2);
2536 	}
2537 
2538 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2539 	for (i = 0 ; i < 128 ; i += 8)
2540 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2541 
2542 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2544 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2545 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2546 }
2547 
2548 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2549 					int qman_id, dma_addr_t qman_pq_addr)
2550 {
2551 	struct cpu_dyn_regs *dyn_regs =
2552 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2553 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2554 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2555 	u32 q_off, dma_qm_offset;
2556 	u32 dma_qm_err_cfg, irq_handler_offset;
2557 
2558 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2559 
2560 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2561 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2562 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2563 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2564 	so_base_en_lo = lower_32_bits(CFG_BASE +
2565 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2566 	so_base_en_hi = upper_32_bits(CFG_BASE +
2567 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2568 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2569 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2570 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2571 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2572 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2573 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2574 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2575 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2576 
2577 	q_off = dma_qm_offset + qman_id * 4;
2578 
2579 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2580 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2581 
2582 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2583 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2584 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2585 
2586 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2587 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2588 							QMAN_LDMA_SRC_OFFSET);
2589 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2590 							QMAN_LDMA_DST_OFFSET);
2591 
2592 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2593 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2594 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2595 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2596 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2597 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2598 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2599 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2600 
2601 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2602 
2603 	/* The following configuration is needed only once per QMAN */
2604 	if (qman_id == 0) {
2605 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2606 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2607 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2608 
2609 		/* Configure RAZWI IRQ */
2610 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2611 		if (hdev->stop_on_err)
2612 			dma_qm_err_cfg |=
2613 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2614 
2615 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2616 
2617 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2618 			lower_32_bits(CFG_BASE + irq_handler_offset));
2619 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2620 			upper_32_bits(CFG_BASE + irq_handler_offset));
2621 
2622 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2623 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2624 									dma_id);
2625 
2626 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2627 				QM_ARB_ERR_MSG_EN_MASK);
2628 
2629 		/* Set timeout to maximum */
2630 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2631 
2632 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2633 				QMAN_EXTERNAL_MAKE_TRUSTED);
2634 
2635 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2636 	}
2637 }
2638 
2639 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2640 {
2641 	struct cpu_dyn_regs *dyn_regs =
2642 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2643 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2644 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2645 	u32 irq_handler_offset;
2646 
2647 	/* Set to maximum possible according to physical size */
2648 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2649 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2650 
2651 	/* WA for H/W bug H3-2116 */
2652 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2653 
2654 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2655 	if (hdev->stop_on_err)
2656 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2657 
2658 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2659 
2660 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2661 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2662 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2663 
2664 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2665 		lower_32_bits(CFG_BASE + irq_handler_offset));
2666 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2667 		upper_32_bits(CFG_BASE + irq_handler_offset));
2668 
2669 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2670 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2671 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2672 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2673 	/* If the channel is secured, it should be in MMU bypass mode */
2674 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2675 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2676 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2677 }
2678 
2679 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2680 				u32 enable_mask)
2681 {
2682 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2683 
2684 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2685 }
2686 
2687 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2688 {
2689 	struct gaudi_device *gaudi = hdev->asic_specific;
2690 	struct hl_hw_queue *q;
2691 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2692 
2693 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2694 		return;
2695 
2696 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2697 		dma_id = gaudi_dma_assignment[i];
2698 		/*
2699 		 * For queues after the CPU Q need to add 1 to get the correct
2700 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2701 		 * order to get the correct MSI register.
2702 		 */
2703 		if (dma_id > 1) {
2704 			cpu_skip = 1;
2705 			nic_skip = NIC_NUMBER_OF_ENGINES;
2706 		} else {
2707 			cpu_skip = 0;
2708 			nic_skip = 0;
2709 		}
2710 
2711 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2712 			q_idx = 4 * dma_id + j + cpu_skip;
2713 			q = &hdev->kernel_queues[q_idx];
2714 			q->cq_id = cq_id++;
2715 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2716 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2717 						q->bus_address);
2718 		}
2719 
2720 		gaudi_init_dma_core(hdev, dma_id);
2721 
2722 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2723 	}
2724 
2725 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2726 }
2727 
2728 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2729 					int qman_id, u64 qman_base_addr)
2730 {
2731 	struct cpu_dyn_regs *dyn_regs =
2732 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2733 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2734 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2735 	u32 dma_qm_err_cfg, irq_handler_offset;
2736 	u32 q_off, dma_qm_offset;
2737 
2738 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2739 
2740 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2741 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2742 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2743 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2744 	so_base_en_lo = lower_32_bits(CFG_BASE +
2745 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2746 	so_base_en_hi = upper_32_bits(CFG_BASE +
2747 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2748 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2749 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2750 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2751 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2752 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2753 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2754 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2755 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2756 
2757 	q_off = dma_qm_offset + qman_id * 4;
2758 
2759 	if (qman_id < 4) {
2760 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2761 					lower_32_bits(qman_base_addr));
2762 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2763 					upper_32_bits(qman_base_addr));
2764 
2765 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2766 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2767 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2768 
2769 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2770 							QMAN_CPDMA_SIZE_OFFSET);
2771 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2772 							QMAN_CPDMA_SRC_OFFSET);
2773 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2774 							QMAN_CPDMA_DST_OFFSET);
2775 	} else {
2776 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2777 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2778 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2779 
2780 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2781 							QMAN_LDMA_SIZE_OFFSET);
2782 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2783 							QMAN_LDMA_SRC_OFFSET);
2784 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2785 							QMAN_LDMA_DST_OFFSET);
2786 
2787 		/* Configure RAZWI IRQ */
2788 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2789 		if (hdev->stop_on_err)
2790 			dma_qm_err_cfg |=
2791 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2792 
2793 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2794 
2795 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2796 			lower_32_bits(CFG_BASE + irq_handler_offset));
2797 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2798 			upper_32_bits(CFG_BASE + irq_handler_offset));
2799 
2800 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2801 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2802 									dma_id);
2803 
2804 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2805 				QM_ARB_ERR_MSG_EN_MASK);
2806 
2807 		/* Set timeout to maximum */
2808 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2809 
2810 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2811 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2812 				QMAN_INTERNAL_MAKE_TRUSTED);
2813 	}
2814 
2815 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2816 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2817 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2818 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2819 
2820 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2821 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2822 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2823 				mtr_base_ws_lo);
2824 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2825 				mtr_base_ws_hi);
2826 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2827 				so_base_ws_lo);
2828 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2829 				so_base_ws_hi);
2830 	}
2831 }
2832 
2833 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2834 {
2835 	struct gaudi_device *gaudi = hdev->asic_specific;
2836 	struct gaudi_internal_qman_info *q;
2837 	u64 qman_base_addr;
2838 	int i, j, dma_id, internal_q_index;
2839 
2840 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2841 		return;
2842 
2843 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2844 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2845 
2846 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2847 			 /*
2848 			  * Add the CPU queue in order to get the correct queue
2849 			  * number as all internal queue are placed after it
2850 			  */
2851 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2852 
2853 			q = &gaudi->internal_qmans[internal_q_index];
2854 			qman_base_addr = (u64) q->pq_dma_addr;
2855 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2856 						qman_base_addr);
2857 		}
2858 
2859 		/* Initializing lower CP for HBM DMA QMAN */
2860 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2861 
2862 		gaudi_init_dma_core(hdev, dma_id);
2863 
2864 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2865 	}
2866 
2867 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2868 }
2869 
2870 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2871 					int qman_id, u64 qman_base_addr)
2872 {
2873 	struct cpu_dyn_regs *dyn_regs =
2874 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2875 	u32 mtr_base_lo, mtr_base_hi;
2876 	u32 so_base_lo, so_base_hi;
2877 	u32 irq_handler_offset;
2878 	u32 q_off, mme_id;
2879 	u32 mme_qm_err_cfg;
2880 
2881 	mtr_base_lo = lower_32_bits(CFG_BASE +
2882 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2883 	mtr_base_hi = upper_32_bits(CFG_BASE +
2884 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2885 	so_base_lo = lower_32_bits(CFG_BASE +
2886 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2887 	so_base_hi = upper_32_bits(CFG_BASE +
2888 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2889 
2890 	q_off = mme_offset + qman_id * 4;
2891 
2892 	if (qman_id < 4) {
2893 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2894 					lower_32_bits(qman_base_addr));
2895 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2896 					upper_32_bits(qman_base_addr));
2897 
2898 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2899 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2900 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2901 
2902 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2903 							QMAN_CPDMA_SIZE_OFFSET);
2904 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2905 							QMAN_CPDMA_SRC_OFFSET);
2906 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2907 							QMAN_CPDMA_DST_OFFSET);
2908 	} else {
2909 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2910 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2911 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2912 
2913 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2914 							QMAN_LDMA_SIZE_OFFSET);
2915 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2916 							QMAN_LDMA_SRC_OFFSET);
2917 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2918 							QMAN_LDMA_DST_OFFSET);
2919 
2920 		/* Configure RAZWI IRQ */
2921 		mme_id = mme_offset /
2922 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2923 
2924 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2925 		if (hdev->stop_on_err)
2926 			mme_qm_err_cfg |=
2927 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2928 
2929 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2930 
2931 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2932 			lower_32_bits(CFG_BASE + irq_handler_offset));
2933 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2934 			upper_32_bits(CFG_BASE + irq_handler_offset));
2935 
2936 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2937 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2938 									mme_id);
2939 
2940 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2941 				QM_ARB_ERR_MSG_EN_MASK);
2942 
2943 		/* Set timeout to maximum */
2944 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2945 
2946 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2947 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2948 				QMAN_INTERNAL_MAKE_TRUSTED);
2949 	}
2950 
2951 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2952 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2953 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2954 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2955 }
2956 
2957 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2958 {
2959 	struct gaudi_device *gaudi = hdev->asic_specific;
2960 	struct gaudi_internal_qman_info *q;
2961 	u64 qman_base_addr;
2962 	u32 mme_offset;
2963 	int i, internal_q_index;
2964 
2965 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2966 		return;
2967 
2968 	/*
2969 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2970 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2971 	 */
2972 
2973 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2974 
2975 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2976 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2977 		q = &gaudi->internal_qmans[internal_q_index];
2978 		qman_base_addr = (u64) q->pq_dma_addr;
2979 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2980 					qman_base_addr);
2981 		if (i == 3)
2982 			mme_offset = 0;
2983 	}
2984 
2985 	/* Initializing lower CP for MME QMANs */
2986 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2987 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2988 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2989 
2990 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2991 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2992 
2993 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2994 }
2995 
2996 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2997 				int qman_id, u64 qman_base_addr)
2998 {
2999 	struct cpu_dyn_regs *dyn_regs =
3000 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3001 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3002 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3003 	u32 tpc_qm_err_cfg, irq_handler_offset;
3004 	u32 q_off, tpc_id;
3005 
3006 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3007 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3008 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3009 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3010 	so_base_en_lo = lower_32_bits(CFG_BASE +
3011 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3012 	so_base_en_hi = upper_32_bits(CFG_BASE +
3013 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3014 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3015 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3016 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3017 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3018 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3019 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3020 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3021 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3022 
3023 	q_off = tpc_offset + qman_id * 4;
3024 
3025 	tpc_id = tpc_offset /
3026 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3027 
3028 	if (qman_id < 4) {
3029 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3030 					lower_32_bits(qman_base_addr));
3031 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3032 					upper_32_bits(qman_base_addr));
3033 
3034 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3035 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3036 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3037 
3038 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3039 							QMAN_CPDMA_SIZE_OFFSET);
3040 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3041 							QMAN_CPDMA_SRC_OFFSET);
3042 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3043 							QMAN_CPDMA_DST_OFFSET);
3044 	} else {
3045 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3046 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3047 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3048 
3049 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3050 							QMAN_LDMA_SIZE_OFFSET);
3051 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3052 							QMAN_LDMA_SRC_OFFSET);
3053 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3054 							QMAN_LDMA_DST_OFFSET);
3055 
3056 		/* Configure RAZWI IRQ */
3057 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3058 		if (hdev->stop_on_err)
3059 			tpc_qm_err_cfg |=
3060 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3061 
3062 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3063 
3064 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3065 			lower_32_bits(CFG_BASE + irq_handler_offset));
3066 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3067 			upper_32_bits(CFG_BASE + irq_handler_offset));
3068 
3069 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3070 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3071 									tpc_id);
3072 
3073 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3074 				QM_ARB_ERR_MSG_EN_MASK);
3075 
3076 		/* Set timeout to maximum */
3077 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3078 
3079 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3080 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3081 				QMAN_INTERNAL_MAKE_TRUSTED);
3082 	}
3083 
3084 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3085 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3086 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3087 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3088 
3089 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3090 	if (tpc_id == 6) {
3091 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3092 				mtr_base_ws_lo);
3093 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3094 				mtr_base_ws_hi);
3095 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3096 				so_base_ws_lo);
3097 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3098 				so_base_ws_hi);
3099 	}
3100 }
3101 
3102 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3103 {
3104 	struct gaudi_device *gaudi = hdev->asic_specific;
3105 	struct gaudi_internal_qman_info *q;
3106 	u64 qman_base_addr;
3107 	u32 so_base_hi, tpc_offset = 0;
3108 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3109 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3110 	int i, tpc_id, internal_q_index;
3111 
3112 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3113 		return;
3114 
3115 	so_base_hi = upper_32_bits(CFG_BASE +
3116 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3117 
3118 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3119 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3120 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3121 						tpc_id * QMAN_STREAMS + i;
3122 			q = &gaudi->internal_qmans[internal_q_index];
3123 			qman_base_addr = (u64) q->pq_dma_addr;
3124 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3125 						qman_base_addr);
3126 
3127 			if (i == 3) {
3128 				/* Initializing lower CP for TPC QMAN */
3129 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3130 
3131 				/* Enable the QMAN and TPC channel */
3132 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3133 						QMAN_TPC_ENABLE);
3134 			}
3135 		}
3136 
3137 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3138 				so_base_hi);
3139 
3140 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3141 
3142 		gaudi->hw_cap_initialized |=
3143 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3144 	}
3145 }
3146 
3147 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3148 				int qman_id, u64 qman_base_addr, int nic_id)
3149 {
3150 	struct cpu_dyn_regs *dyn_regs =
3151 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3152 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3153 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3154 	u32 nic_qm_err_cfg, irq_handler_offset;
3155 	u32 q_off;
3156 
3157 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3159 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3160 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3161 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3163 	so_base_en_hi = upper_32_bits(CFG_BASE +
3164 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3165 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3167 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3168 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3169 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3170 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3171 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3172 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3173 
3174 	q_off = nic_offset + qman_id * 4;
3175 
3176 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3177 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3178 
3179 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3180 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3181 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3182 
3183 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3184 							QMAN_LDMA_SIZE_OFFSET);
3185 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3186 							QMAN_LDMA_SRC_OFFSET);
3187 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3188 							QMAN_LDMA_DST_OFFSET);
3189 
3190 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3191 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3192 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3193 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3194 
3195 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3196 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3197 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3198 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3199 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3200 
3201 	if (qman_id == 0) {
3202 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3203 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3204 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3205 
3206 		/* Configure RAZWI IRQ */
3207 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3208 		if (hdev->stop_on_err)
3209 			nic_qm_err_cfg |=
3210 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3211 
3212 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3213 
3214 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3215 			lower_32_bits(CFG_BASE + irq_handler_offset));
3216 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3217 			upper_32_bits(CFG_BASE + irq_handler_offset));
3218 
3219 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3220 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3221 									nic_id);
3222 
3223 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3224 				QM_ARB_ERR_MSG_EN_MASK);
3225 
3226 		/* Set timeout to maximum */
3227 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3228 
3229 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3230 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3231 				QMAN_INTERNAL_MAKE_TRUSTED);
3232 	}
3233 }
3234 
3235 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3236 {
3237 	struct gaudi_device *gaudi = hdev->asic_specific;
3238 	struct gaudi_internal_qman_info *q;
3239 	u64 qman_base_addr;
3240 	u32 nic_offset = 0;
3241 	u32 nic_delta_between_qmans =
3242 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3243 	u32 nic_delta_between_nics =
3244 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3245 	int i, nic_id, internal_q_index;
3246 
3247 	if (!hdev->nic_ports_mask)
3248 		return;
3249 
3250 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3251 		return;
3252 
3253 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3254 
3255 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3256 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3257 			nic_offset += nic_delta_between_qmans;
3258 			if (nic_id & 1) {
3259 				nic_offset -= (nic_delta_between_qmans * 2);
3260 				nic_offset += nic_delta_between_nics;
3261 			}
3262 			continue;
3263 		}
3264 
3265 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3266 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3267 						nic_id * QMAN_STREAMS + i;
3268 			q = &gaudi->internal_qmans[internal_q_index];
3269 			qman_base_addr = (u64) q->pq_dma_addr;
3270 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3271 						qman_base_addr, nic_id);
3272 		}
3273 
3274 		/* Enable the QMAN */
3275 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3276 
3277 		nic_offset += nic_delta_between_qmans;
3278 		if (nic_id & 1) {
3279 			nic_offset -= (nic_delta_between_qmans * 2);
3280 			nic_offset += nic_delta_between_nics;
3281 		}
3282 
3283 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3284 	}
3285 }
3286 
3287 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3288 {
3289 	struct gaudi_device *gaudi = hdev->asic_specific;
3290 
3291 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3292 		return;
3293 
3294 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3295 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3296 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3297 }
3298 
3299 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3300 {
3301 	struct gaudi_device *gaudi = hdev->asic_specific;
3302 
3303 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3304 		return;
3305 
3306 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3307 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3308 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3309 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3310 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3311 }
3312 
3313 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3314 {
3315 	struct gaudi_device *gaudi = hdev->asic_specific;
3316 
3317 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3318 		return;
3319 
3320 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3321 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3322 }
3323 
3324 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3325 {
3326 	struct gaudi_device *gaudi = hdev->asic_specific;
3327 	u32 tpc_offset = 0;
3328 	int tpc_id;
3329 
3330 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3331 		return;
3332 
3333 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3334 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3335 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3336 	}
3337 }
3338 
3339 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3340 {
3341 	struct gaudi_device *gaudi = hdev->asic_specific;
3342 	u32 nic_mask, nic_offset = 0;
3343 	u32 nic_delta_between_qmans =
3344 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3345 	u32 nic_delta_between_nics =
3346 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3347 	int nic_id;
3348 
3349 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3350 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3351 
3352 		if (gaudi->hw_cap_initialized & nic_mask)
3353 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3354 
3355 		nic_offset += nic_delta_between_qmans;
3356 		if (nic_id & 1) {
3357 			nic_offset -= (nic_delta_between_qmans * 2);
3358 			nic_offset += nic_delta_between_nics;
3359 		}
3360 	}
3361 }
3362 
3363 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3364 {
3365 	struct gaudi_device *gaudi = hdev->asic_specific;
3366 
3367 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3368 		return;
3369 
3370 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3371 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3372 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3373 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3374 }
3375 
3376 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3377 {
3378 	struct gaudi_device *gaudi = hdev->asic_specific;
3379 
3380 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3381 		return;
3382 
3383 	/* Stop CPs of HBM DMA QMANs */
3384 
3385 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3388 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3389 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3390 }
3391 
3392 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3393 {
3394 	struct gaudi_device *gaudi = hdev->asic_specific;
3395 
3396 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3397 		return;
3398 
3399 	/* Stop CPs of MME QMANs */
3400 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3401 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3402 }
3403 
3404 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3405 {
3406 	struct gaudi_device *gaudi = hdev->asic_specific;
3407 
3408 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3409 		return;
3410 
3411 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3417 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3418 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3419 }
3420 
3421 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3422 {
3423 	struct gaudi_device *gaudi = hdev->asic_specific;
3424 
3425 	/* Stop upper CPs of QMANs */
3426 
3427 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3428 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3429 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3430 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3431 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3432 
3433 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3434 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3435 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3436 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3437 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3438 
3439 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3440 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3441 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3442 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3443 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3444 
3445 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3446 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3447 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3448 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3449 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3450 
3451 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3452 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3453 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3454 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3455 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3456 
3457 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3458 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3459 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3460 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3461 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3462 
3463 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3464 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3465 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3466 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3467 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3468 
3469 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3470 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3471 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3472 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3473 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3474 
3475 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3476 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3477 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3478 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3479 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3480 
3481 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3482 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3483 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3484 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3485 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3486 }
3487 
3488 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3489 {
3490 	struct gaudi_device *gaudi = hdev->asic_specific;
3491 
3492 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3493 		return;
3494 
3495 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3496 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3497 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3498 }
3499 
3500 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3501 {
3502 	struct gaudi_device *gaudi = hdev->asic_specific;
3503 
3504 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3505 		return;
3506 
3507 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3510 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3511 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3512 }
3513 
3514 static void gaudi_mme_stall(struct hl_device *hdev)
3515 {
3516 	struct gaudi_device *gaudi = hdev->asic_specific;
3517 
3518 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3519 		return;
3520 
3521 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3522 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3535 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3536 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3537 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3538 }
3539 
3540 static void gaudi_tpc_stall(struct hl_device *hdev)
3541 {
3542 	struct gaudi_device *gaudi = hdev->asic_specific;
3543 
3544 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3545 		return;
3546 
3547 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3553 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3554 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3555 }
3556 
3557 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3558 {
3559 	u32 qman_offset;
3560 	int i;
3561 
3562 	if (hdev->asic_prop.fw_security_enabled)
3563 		return;
3564 
3565 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3566 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3567 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3568 
3569 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3570 	}
3571 
3572 	WREG32(mmMME0_QM_CGM_CFG, 0);
3573 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3574 	WREG32(mmMME2_QM_CGM_CFG, 0);
3575 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3576 
3577 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3578 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3579 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3580 
3581 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3582 	}
3583 }
3584 
3585 static void gaudi_enable_timestamp(struct hl_device *hdev)
3586 {
3587 	/* Disable the timestamp counter */
3588 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3589 
3590 	/* Zero the lower/upper parts of the 64-bit counter */
3591 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3592 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3593 
3594 	/* Enable the counter */
3595 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3596 }
3597 
3598 static void gaudi_disable_timestamp(struct hl_device *hdev)
3599 {
3600 	/* Disable the timestamp counter */
3601 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3602 }
3603 
3604 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3605 {
3606 	u32 wait_timeout_ms;
3607 
3608 	if (hdev->pldm)
3609 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3610 	else
3611 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3612 
3613 	if (fw_reset)
3614 		goto skip_engines;
3615 
3616 	gaudi_stop_nic_qmans(hdev);
3617 	gaudi_stop_mme_qmans(hdev);
3618 	gaudi_stop_tpc_qmans(hdev);
3619 	gaudi_stop_hbm_dma_qmans(hdev);
3620 	gaudi_stop_pci_dma_qmans(hdev);
3621 
3622 	msleep(wait_timeout_ms);
3623 
3624 	gaudi_pci_dma_stall(hdev);
3625 	gaudi_hbm_dma_stall(hdev);
3626 	gaudi_tpc_stall(hdev);
3627 	gaudi_mme_stall(hdev);
3628 
3629 	msleep(wait_timeout_ms);
3630 
3631 	gaudi_disable_nic_qmans(hdev);
3632 	gaudi_disable_mme_qmans(hdev);
3633 	gaudi_disable_tpc_qmans(hdev);
3634 	gaudi_disable_hbm_dma_qmans(hdev);
3635 	gaudi_disable_pci_dma_qmans(hdev);
3636 
3637 	gaudi_disable_timestamp(hdev);
3638 
3639 skip_engines:
3640 	gaudi_disable_msi(hdev);
3641 }
3642 
3643 static int gaudi_mmu_init(struct hl_device *hdev)
3644 {
3645 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3646 	struct gaudi_device *gaudi = hdev->asic_specific;
3647 	u64 hop0_addr;
3648 	int rc, i;
3649 
3650 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3651 		return 0;
3652 
3653 	for (i = 0 ; i < prop->max_asid ; i++) {
3654 		hop0_addr = prop->mmu_pgt_addr +
3655 				(i * prop->mmu_hop_table_size);
3656 
3657 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3658 		if (rc) {
3659 			dev_err(hdev->dev,
3660 				"failed to set hop0 addr for asid %d\n", i);
3661 			return rc;
3662 		}
3663 	}
3664 
3665 	/* init MMU cache manage page */
3666 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3667 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3668 
3669 	/* mem cache invalidation */
3670 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3671 
3672 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3673 	if (rc)
3674 		return rc;
3675 
3676 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3677 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3678 
3679 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3680 
3681 	/*
3682 	 * The H/W expects the first PI after init to be 1. After wraparound
3683 	 * we'll write 0.
3684 	 */
3685 	gaudi->mmu_cache_inv_pi = 1;
3686 
3687 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3688 
3689 	return 0;
3690 }
3691 
3692 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3693 {
3694 	void __iomem *dst;
3695 
3696 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3697 
3698 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3699 }
3700 
3701 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3702 {
3703 	void __iomem *dst;
3704 
3705 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3706 
3707 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3708 }
3709 
3710 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3711 {
3712 	struct dynamic_fw_load_mgr *dynamic_loader;
3713 	struct cpu_dyn_regs *dyn_regs;
3714 
3715 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3716 
3717 	/*
3718 	 * here we update initial values for few specific dynamic regs (as
3719 	 * before reading the first descriptor from FW those value has to be
3720 	 * hard-coded) in later stages of the protocol those values will be
3721 	 * updated automatically by reading the FW descriptor so data there
3722 	 * will always be up-to-date
3723 	 */
3724 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3725 	dyn_regs->kmd_msg_to_cpu =
3726 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3727 	dyn_regs->cpu_cmd_status_to_host =
3728 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3729 
3730 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3731 }
3732 
3733 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3734 {
3735 	struct static_fw_load_mgr *static_loader;
3736 
3737 	static_loader = &hdev->fw_loader.static_loader;
3738 
3739 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3740 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3741 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3742 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3743 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3744 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3745 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3746 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3747 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3748 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3749 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3750 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3751 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3752 			GAUDI_PLDM_RESET_WAIT_MSEC :
3753 			GAUDI_CPU_RESET_WAIT_MSEC;
3754 }
3755 
3756 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3757 {
3758 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3759 
3760 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3761 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3762 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3763 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3764 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3765 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3766 }
3767 
3768 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3769 {
3770 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3771 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3772 
3773 	/* fill common fields */
3774 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3775 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3776 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3777 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3778 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3779 	fw_loader->skip_bmc = !hdev->bmc_enable;
3780 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3781 	fw_loader->dram_bar_id = HBM_BAR_ID;
3782 
3783 	if (prop->dynamic_fw_load)
3784 		gaudi_init_dynamic_firmware_loader(hdev);
3785 	else
3786 		gaudi_init_static_firmware_loader(hdev);
3787 }
3788 
3789 static int gaudi_init_cpu(struct hl_device *hdev)
3790 {
3791 	struct gaudi_device *gaudi = hdev->asic_specific;
3792 	int rc;
3793 
3794 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3795 		return 0;
3796 
3797 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3798 		return 0;
3799 
3800 	/*
3801 	 * The device CPU works with 40 bits addresses.
3802 	 * This register sets the extension to 50 bits.
3803 	 */
3804 	if (!hdev->asic_prop.fw_security_enabled)
3805 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3806 
3807 	rc = hl_fw_init_cpu(hdev);
3808 
3809 	if (rc)
3810 		return rc;
3811 
3812 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3813 
3814 	return 0;
3815 }
3816 
3817 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3818 {
3819 	struct cpu_dyn_regs *dyn_regs =
3820 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3821 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3822 	struct gaudi_device *gaudi = hdev->asic_specific;
3823 	u32 status, irq_handler_offset;
3824 	struct hl_eq *eq;
3825 	struct hl_hw_queue *cpu_pq =
3826 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3827 	int err;
3828 
3829 	if (!hdev->cpu_queues_enable)
3830 		return 0;
3831 
3832 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3833 		return 0;
3834 
3835 	eq = &hdev->event_queue;
3836 
3837 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3838 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3839 
3840 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3841 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3842 
3843 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3844 			lower_32_bits(hdev->cpu_accessible_dma_address));
3845 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3846 			upper_32_bits(hdev->cpu_accessible_dma_address));
3847 
3848 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3849 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3850 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3851 
3852 	/* Used for EQ CI */
3853 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3854 
3855 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3856 
3857 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3858 
3859 	irq_handler_offset = prop->gic_interrupts_enable ?
3860 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3861 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3862 
3863 	WREG32(irq_handler_offset,
3864 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3865 
3866 	err = hl_poll_timeout(
3867 		hdev,
3868 		mmCPU_IF_QUEUE_INIT,
3869 		status,
3870 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3871 		1000,
3872 		cpu_timeout);
3873 
3874 	if (err) {
3875 		dev_err(hdev->dev,
3876 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3877 		return -EIO;
3878 	}
3879 
3880 	/* update FW application security bits */
3881 	if (prop->fw_cpu_boot_dev_sts0_valid)
3882 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3883 	if (prop->fw_cpu_boot_dev_sts1_valid)
3884 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3885 
3886 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3887 	return 0;
3888 }
3889 
3890 static void gaudi_pre_hw_init(struct hl_device *hdev)
3891 {
3892 	/* Perform read from the device to make sure device is up */
3893 	RREG32(mmHW_STATE);
3894 
3895 	if (!hdev->asic_prop.fw_security_enabled) {
3896 		/* Set the access through PCI bars (Linux driver only) as
3897 		 * secured
3898 		 */
3899 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3900 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3901 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3902 
3903 		/* Perform read to flush the waiting writes to ensure
3904 		 * configuration was set in the device
3905 		 */
3906 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3907 	}
3908 
3909 	/*
3910 	 * Let's mark in the H/W that we have reached this point. We check
3911 	 * this value in the reset_before_init function to understand whether
3912 	 * we need to reset the chip before doing H/W init. This register is
3913 	 * cleared by the H/W upon H/W reset
3914 	 */
3915 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3916 }
3917 
3918 static int gaudi_hw_init(struct hl_device *hdev)
3919 {
3920 	struct gaudi_device *gaudi = hdev->asic_specific;
3921 	int rc;
3922 
3923 	gaudi_pre_hw_init(hdev);
3924 
3925 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3926 	 * So we set it here and if anyone tries to move it later to
3927 	 * a different address, there will be an error
3928 	 */
3929 	if (hdev->asic_prop.iatu_done_by_fw)
3930 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3931 
3932 	/*
3933 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3934 	 * base address of dram
3935 	 */
3936 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3937 		dev_err(hdev->dev,
3938 			"failed to map HBM bar to DRAM base address\n");
3939 		return -EIO;
3940 	}
3941 
3942 	rc = gaudi_init_cpu(hdev);
3943 	if (rc) {
3944 		dev_err(hdev->dev, "failed to initialize CPU\n");
3945 		return rc;
3946 	}
3947 
3948 	/* In case the clock gating was enabled in preboot we need to disable
3949 	 * it here before touching the MME/TPC registers.
3950 	 */
3951 	gaudi_disable_clock_gating(hdev);
3952 
3953 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3954 	gaudi_init_scrambler_sram(hdev);
3955 
3956 	/* This is here just in case we are working without CPU */
3957 	gaudi_init_scrambler_hbm(hdev);
3958 
3959 	gaudi_init_golden_registers(hdev);
3960 
3961 	rc = gaudi_mmu_init(hdev);
3962 	if (rc)
3963 		return rc;
3964 
3965 	gaudi_init_security(hdev);
3966 
3967 	gaudi_init_pci_dma_qmans(hdev);
3968 
3969 	gaudi_init_hbm_dma_qmans(hdev);
3970 
3971 	gaudi_init_mme_qmans(hdev);
3972 
3973 	gaudi_init_tpc_qmans(hdev);
3974 
3975 	gaudi_init_nic_qmans(hdev);
3976 
3977 	gaudi_enable_timestamp(hdev);
3978 
3979 	/* MSI must be enabled before CPU queues and NIC are initialized */
3980 	rc = gaudi_enable_msi(hdev);
3981 	if (rc)
3982 		goto disable_queues;
3983 
3984 	/* must be called after MSI was enabled */
3985 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3986 	if (rc) {
3987 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3988 			rc);
3989 		goto disable_msi;
3990 	}
3991 
3992 	/* Perform read from the device to flush all configuration */
3993 	RREG32(mmHW_STATE);
3994 
3995 	return 0;
3996 
3997 disable_msi:
3998 	gaudi_disable_msi(hdev);
3999 disable_queues:
4000 	gaudi_disable_mme_qmans(hdev);
4001 	gaudi_disable_pci_dma_qmans(hdev);
4002 
4003 	return rc;
4004 }
4005 
4006 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4007 {
4008 	struct cpu_dyn_regs *dyn_regs =
4009 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4010 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4011 	struct gaudi_device *gaudi = hdev->asic_specific;
4012 	bool driver_performs_reset;
4013 
4014 	if (!hard_reset) {
4015 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4016 		return 0;
4017 	}
4018 
4019 	if (hdev->pldm) {
4020 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4021 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4022 	} else {
4023 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4024 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4025 	}
4026 
4027 	if (fw_reset) {
4028 		dev_dbg(hdev->dev,
4029 			"Firmware performs HARD reset, going to wait %dms\n",
4030 			reset_timeout_ms);
4031 
4032 		goto skip_reset;
4033 	}
4034 
4035 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4036 					!hdev->asic_prop.hard_reset_done_by_fw);
4037 
4038 	/* Set device to handle FLR by H/W as we will put the device CPU to
4039 	 * halt mode
4040 	 */
4041 	if (driver_performs_reset)
4042 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4043 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4044 
4045 	/* If linux is loaded in the device CPU we need to communicate with it
4046 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4047 	 * registers in case of old F/Ws
4048 	 */
4049 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4050 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4051 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4052 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4053 
4054 		WREG32(irq_handler_offset,
4055 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4056 
4057 		/* This is a hail-mary attempt to revive the card in the small chance that the
4058 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4059 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4060 		 * reset as if Linux wasn't loaded.
4061 		 *
4062 		 * We do it only if the reset cause was HB, because that would be the indication
4063 		 * of such an event.
4064 		 *
4065 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4066 		 * damage.
4067 		 */
4068 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4069 			if (hdev->asic_prop.hard_reset_done_by_fw)
4070 				hl_fw_ask_hard_reset_without_linux(hdev);
4071 			else
4072 				hl_fw_ask_halt_machine_without_linux(hdev);
4073 		}
4074 	} else {
4075 		if (hdev->asic_prop.hard_reset_done_by_fw)
4076 			hl_fw_ask_hard_reset_without_linux(hdev);
4077 		else
4078 			hl_fw_ask_halt_machine_without_linux(hdev);
4079 	}
4080 
4081 	if (driver_performs_reset) {
4082 
4083 		/* Configure the reset registers. Must be done as early as
4084 		 * possible in case we fail during H/W initialization
4085 		 */
4086 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4087 						(CFG_RST_H_DMA_MASK |
4088 						CFG_RST_H_MME_MASK |
4089 						CFG_RST_H_SM_MASK |
4090 						CFG_RST_H_TPC_7_MASK));
4091 
4092 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4093 
4094 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4095 						(CFG_RST_H_HBM_MASK |
4096 						CFG_RST_H_TPC_7_MASK |
4097 						CFG_RST_H_NIC_MASK |
4098 						CFG_RST_H_SM_MASK |
4099 						CFG_RST_H_DMA_MASK |
4100 						CFG_RST_H_MME_MASK |
4101 						CFG_RST_H_CPU_MASK |
4102 						CFG_RST_H_MMU_MASK));
4103 
4104 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4105 						(CFG_RST_L_IF_MASK |
4106 						CFG_RST_L_PSOC_MASK |
4107 						CFG_RST_L_TPC_MASK));
4108 
4109 		msleep(cpu_timeout_ms);
4110 
4111 		/* Tell ASIC not to re-initialize PCIe */
4112 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4113 
4114 		/* Restart BTL/BLR upon hard-reset */
4115 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4116 
4117 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4118 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4119 
4120 		dev_dbg(hdev->dev,
4121 			"Issued HARD reset command, going to wait %dms\n",
4122 			reset_timeout_ms);
4123 	} else {
4124 		dev_dbg(hdev->dev,
4125 			"Firmware performs HARD reset, going to wait %dms\n",
4126 			reset_timeout_ms);
4127 	}
4128 
4129 skip_reset:
4130 	/*
4131 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4132 	 * itself is in reset. Need to wait until the reset is deasserted
4133 	 */
4134 	msleep(reset_timeout_ms);
4135 
4136 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4137 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4138 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4139 		return -ETIMEDOUT;
4140 	}
4141 
4142 	if (gaudi) {
4143 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4144 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4145 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4146 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4147 						HW_CAP_HBM_SCRAMBLER);
4148 
4149 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4150 
4151 		hdev->device_cpu_is_halted = false;
4152 	}
4153 	return 0;
4154 }
4155 
4156 static int gaudi_suspend(struct hl_device *hdev)
4157 {
4158 	int rc;
4159 
4160 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4161 	if (rc)
4162 		dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
4163 
4164 	return rc;
4165 }
4166 
4167 static int gaudi_resume(struct hl_device *hdev)
4168 {
4169 	return gaudi_init_iatu(hdev);
4170 }
4171 
4172 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4173 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4174 {
4175 	int rc;
4176 
4177 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4178 			VM_DONTCOPY | VM_NORESERVE);
4179 
4180 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4181 				(dma_addr - HOST_PHYS_BASE), size);
4182 	if (rc)
4183 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4184 
4185 	return rc;
4186 }
4187 
4188 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4189 {
4190 	struct cpu_dyn_regs *dyn_regs =
4191 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4192 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4193 	struct gaudi_device *gaudi = hdev->asic_specific;
4194 	bool invalid_queue = false;
4195 	int dma_id;
4196 
4197 	switch (hw_queue_id) {
4198 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4199 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4200 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4201 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4202 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4203 		break;
4204 
4205 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4206 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4207 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4208 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4209 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4210 		break;
4211 
4212 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4213 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4214 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4215 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4216 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4217 		break;
4218 
4219 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4220 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4221 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4222 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4223 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4224 		break;
4225 
4226 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4227 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4228 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4229 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4230 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4231 		break;
4232 
4233 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4234 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4235 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4236 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4237 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4238 		break;
4239 
4240 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4241 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4242 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4243 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4244 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4245 		break;
4246 
4247 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4248 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4249 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4250 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4251 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4252 		break;
4253 
4254 	case GAUDI_QUEUE_ID_CPU_PQ:
4255 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4256 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4257 		else
4258 			invalid_queue = true;
4259 		break;
4260 
4261 	case GAUDI_QUEUE_ID_MME_0_0:
4262 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4263 		break;
4264 
4265 	case GAUDI_QUEUE_ID_MME_0_1:
4266 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4267 		break;
4268 
4269 	case GAUDI_QUEUE_ID_MME_0_2:
4270 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4271 		break;
4272 
4273 	case GAUDI_QUEUE_ID_MME_0_3:
4274 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4275 		break;
4276 
4277 	case GAUDI_QUEUE_ID_MME_1_0:
4278 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4279 		break;
4280 
4281 	case GAUDI_QUEUE_ID_MME_1_1:
4282 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4283 		break;
4284 
4285 	case GAUDI_QUEUE_ID_MME_1_2:
4286 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4287 		break;
4288 
4289 	case GAUDI_QUEUE_ID_MME_1_3:
4290 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4291 		break;
4292 
4293 	case GAUDI_QUEUE_ID_TPC_0_0:
4294 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4295 		break;
4296 
4297 	case GAUDI_QUEUE_ID_TPC_0_1:
4298 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4299 		break;
4300 
4301 	case GAUDI_QUEUE_ID_TPC_0_2:
4302 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4303 		break;
4304 
4305 	case GAUDI_QUEUE_ID_TPC_0_3:
4306 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4307 		break;
4308 
4309 	case GAUDI_QUEUE_ID_TPC_1_0:
4310 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4311 		break;
4312 
4313 	case GAUDI_QUEUE_ID_TPC_1_1:
4314 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4315 		break;
4316 
4317 	case GAUDI_QUEUE_ID_TPC_1_2:
4318 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4319 		break;
4320 
4321 	case GAUDI_QUEUE_ID_TPC_1_3:
4322 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4323 		break;
4324 
4325 	case GAUDI_QUEUE_ID_TPC_2_0:
4326 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4327 		break;
4328 
4329 	case GAUDI_QUEUE_ID_TPC_2_1:
4330 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4331 		break;
4332 
4333 	case GAUDI_QUEUE_ID_TPC_2_2:
4334 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4335 		break;
4336 
4337 	case GAUDI_QUEUE_ID_TPC_2_3:
4338 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4339 		break;
4340 
4341 	case GAUDI_QUEUE_ID_TPC_3_0:
4342 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4343 		break;
4344 
4345 	case GAUDI_QUEUE_ID_TPC_3_1:
4346 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4347 		break;
4348 
4349 	case GAUDI_QUEUE_ID_TPC_3_2:
4350 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4351 		break;
4352 
4353 	case GAUDI_QUEUE_ID_TPC_3_3:
4354 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4355 		break;
4356 
4357 	case GAUDI_QUEUE_ID_TPC_4_0:
4358 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4359 		break;
4360 
4361 	case GAUDI_QUEUE_ID_TPC_4_1:
4362 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4363 		break;
4364 
4365 	case GAUDI_QUEUE_ID_TPC_4_2:
4366 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4367 		break;
4368 
4369 	case GAUDI_QUEUE_ID_TPC_4_3:
4370 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4371 		break;
4372 
4373 	case GAUDI_QUEUE_ID_TPC_5_0:
4374 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4375 		break;
4376 
4377 	case GAUDI_QUEUE_ID_TPC_5_1:
4378 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4379 		break;
4380 
4381 	case GAUDI_QUEUE_ID_TPC_5_2:
4382 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4383 		break;
4384 
4385 	case GAUDI_QUEUE_ID_TPC_5_3:
4386 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4387 		break;
4388 
4389 	case GAUDI_QUEUE_ID_TPC_6_0:
4390 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4391 		break;
4392 
4393 	case GAUDI_QUEUE_ID_TPC_6_1:
4394 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4395 		break;
4396 
4397 	case GAUDI_QUEUE_ID_TPC_6_2:
4398 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4399 		break;
4400 
4401 	case GAUDI_QUEUE_ID_TPC_6_3:
4402 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4403 		break;
4404 
4405 	case GAUDI_QUEUE_ID_TPC_7_0:
4406 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4407 		break;
4408 
4409 	case GAUDI_QUEUE_ID_TPC_7_1:
4410 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4411 		break;
4412 
4413 	case GAUDI_QUEUE_ID_TPC_7_2:
4414 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4415 		break;
4416 
4417 	case GAUDI_QUEUE_ID_TPC_7_3:
4418 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4419 		break;
4420 
4421 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4422 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4423 			invalid_queue = true;
4424 
4425 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4426 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4427 		break;
4428 
4429 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4430 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4431 			invalid_queue = true;
4432 
4433 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4434 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4435 		break;
4436 
4437 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4438 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4439 			invalid_queue = true;
4440 
4441 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4442 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4443 		break;
4444 
4445 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4446 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4447 			invalid_queue = true;
4448 
4449 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4450 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4451 		break;
4452 
4453 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4454 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4455 			invalid_queue = true;
4456 
4457 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4458 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4459 		break;
4460 
4461 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4462 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4463 			invalid_queue = true;
4464 
4465 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4466 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4467 		break;
4468 
4469 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4470 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4471 			invalid_queue = true;
4472 
4473 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4474 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4475 		break;
4476 
4477 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4478 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4479 			invalid_queue = true;
4480 
4481 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4482 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4483 		break;
4484 
4485 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4486 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4487 			invalid_queue = true;
4488 
4489 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4490 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4491 		break;
4492 
4493 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4494 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4495 			invalid_queue = true;
4496 
4497 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4498 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4499 		break;
4500 
4501 	default:
4502 		invalid_queue = true;
4503 	}
4504 
4505 	if (invalid_queue) {
4506 		/* Should never get here */
4507 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4508 			hw_queue_id);
4509 		return;
4510 	}
4511 
4512 	db_value = pi;
4513 
4514 	/* ring the doorbell */
4515 	WREG32(db_reg_offset, db_value);
4516 
4517 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4518 		/* make sure device CPU will read latest data from host */
4519 		mb();
4520 
4521 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4522 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4523 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4524 
4525 		WREG32(irq_handler_offset,
4526 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4527 	}
4528 }
4529 
4530 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4531 				struct hl_bd *bd)
4532 {
4533 	__le64 *pbd = (__le64 *) bd;
4534 
4535 	/* The QMANs are on the host memory so a simple copy suffice */
4536 	pqe[0] = pbd[0];
4537 	pqe[1] = pbd[1];
4538 }
4539 
4540 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4541 					dma_addr_t *dma_handle, gfp_t flags)
4542 {
4543 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4544 						dma_handle, flags);
4545 
4546 	/* Shift to the device's base physical address of host memory */
4547 	if (kernel_addr)
4548 		*dma_handle += HOST_PHYS_BASE;
4549 
4550 	return kernel_addr;
4551 }
4552 
4553 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4554 		void *cpu_addr, dma_addr_t dma_handle)
4555 {
4556 	/* Cancel the device's base physical address of host memory */
4557 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4558 
4559 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4560 }
4561 
4562 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4563 {
4564 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4565 	u64 cur_addr = prop->dram_user_base_address;
4566 	u32 chunk_size, busy;
4567 	int rc, dma_id;
4568 
4569 	while (cur_addr < prop->dram_end_address) {
4570 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4571 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4572 
4573 			chunk_size =
4574 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4575 
4576 			dev_dbg(hdev->dev,
4577 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4578 				cur_addr, cur_addr + chunk_size);
4579 
4580 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4581 					lower_32_bits(val));
4582 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4583 					upper_32_bits(val));
4584 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4585 						lower_32_bits(cur_addr));
4586 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4587 						upper_32_bits(cur_addr));
4588 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4589 					chunk_size);
4590 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4591 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4592 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4593 
4594 			cur_addr += chunk_size;
4595 
4596 			if (cur_addr == prop->dram_end_address)
4597 				break;
4598 		}
4599 
4600 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4601 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4602 
4603 			rc = hl_poll_timeout(
4604 				hdev,
4605 				mmDMA0_CORE_STS0 + dma_offset,
4606 				busy,
4607 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4608 				1000,
4609 				HBM_SCRUBBING_TIMEOUT_US);
4610 
4611 			if (rc) {
4612 				dev_err(hdev->dev,
4613 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4614 					dma_id);
4615 				return -EIO;
4616 			}
4617 		}
4618 	}
4619 
4620 	return 0;
4621 }
4622 
4623 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4624 {
4625 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4626 	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4627 	u64 addr, size, val = hdev->memory_scrub_val;
4628 	ktime_t timeout;
4629 	int rc = 0;
4630 
4631 	if (!hdev->memory_scrub)
4632 		return 0;
4633 
4634 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4635 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4636 		if (ktime_compare(ktime_get(), timeout) > 0) {
4637 			dev_err(hdev->dev, "waiting for idle timeout\n");
4638 			return -ETIMEDOUT;
4639 		}
4640 		usleep_range((1000 >> 2) + 1, 1000);
4641 	}
4642 
4643 	/* Scrub SRAM */
4644 	addr = prop->sram_user_base_address;
4645 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4646 
4647 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4648 			addr, addr + size, val);
4649 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4650 	if (rc) {
4651 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4652 		return rc;
4653 	}
4654 
4655 	/* Scrub HBM using all DMA channels in parallel */
4656 	rc = gaudi_scrub_device_dram(hdev, val);
4657 	if (rc) {
4658 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4659 		return rc;
4660 	}
4661 
4662 	return 0;
4663 }
4664 
4665 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4666 				u32 queue_id, dma_addr_t *dma_handle,
4667 				u16 *queue_len)
4668 {
4669 	struct gaudi_device *gaudi = hdev->asic_specific;
4670 	struct gaudi_internal_qman_info *q;
4671 
4672 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4673 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4674 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4675 		return NULL;
4676 	}
4677 
4678 	q = &gaudi->internal_qmans[queue_id];
4679 	*dma_handle = q->pq_dma_addr;
4680 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4681 
4682 	return q->pq_kernel_addr;
4683 }
4684 
4685 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4686 				u16 len, u32 timeout, u64 *result)
4687 {
4688 	struct gaudi_device *gaudi = hdev->asic_specific;
4689 
4690 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4691 		if (result)
4692 			*result = 0;
4693 		return 0;
4694 	}
4695 
4696 	if (!timeout)
4697 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4698 
4699 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4700 						timeout, result);
4701 }
4702 
4703 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4704 {
4705 	struct packet_msg_prot *fence_pkt;
4706 	dma_addr_t pkt_dma_addr;
4707 	u32 fence_val, tmp, timeout_usec;
4708 	dma_addr_t fence_dma_addr;
4709 	u32 *fence_ptr;
4710 	int rc;
4711 
4712 	if (hdev->pldm)
4713 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4714 	else
4715 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4716 
4717 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4718 
4719 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4720 	if (!fence_ptr) {
4721 		dev_err(hdev->dev,
4722 			"Failed to allocate memory for H/W queue %d testing\n",
4723 			hw_queue_id);
4724 		return -ENOMEM;
4725 	}
4726 
4727 	*fence_ptr = 0;
4728 
4729 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4730 						&pkt_dma_addr);
4731 	if (!fence_pkt) {
4732 		dev_err(hdev->dev,
4733 			"Failed to allocate packet for H/W queue %d testing\n",
4734 			hw_queue_id);
4735 		rc = -ENOMEM;
4736 		goto free_fence_ptr;
4737 	}
4738 
4739 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4740 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4741 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4742 
4743 	fence_pkt->ctl = cpu_to_le32(tmp);
4744 	fence_pkt->value = cpu_to_le32(fence_val);
4745 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4746 
4747 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4748 					sizeof(struct packet_msg_prot),
4749 					pkt_dma_addr);
4750 	if (rc) {
4751 		dev_err(hdev->dev,
4752 			"Failed to send fence packet to H/W queue %d\n",
4753 			hw_queue_id);
4754 		goto free_pkt;
4755 	}
4756 
4757 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4758 					1000, timeout_usec, true);
4759 
4760 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4761 
4762 	if (rc == -ETIMEDOUT) {
4763 		dev_err(hdev->dev,
4764 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4765 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4766 		rc = -EIO;
4767 	}
4768 
4769 free_pkt:
4770 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4771 free_fence_ptr:
4772 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4773 	return rc;
4774 }
4775 
4776 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4777 {
4778 	struct gaudi_device *gaudi = hdev->asic_specific;
4779 
4780 	/*
4781 	 * check capability here as send_cpu_message() won't update the result
4782 	 * value if no capability
4783 	 */
4784 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4785 		return 0;
4786 
4787 	return hl_fw_test_cpu_queue(hdev);
4788 }
4789 
4790 static int gaudi_test_queues(struct hl_device *hdev)
4791 {
4792 	int i, rc, ret_val = 0;
4793 
4794 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4795 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4796 			rc = gaudi_test_queue(hdev, i);
4797 			if (rc)
4798 				ret_val = -EINVAL;
4799 		}
4800 	}
4801 
4802 	rc = gaudi_test_cpu_queue(hdev);
4803 	if (rc)
4804 		ret_val = -EINVAL;
4805 
4806 	return ret_val;
4807 }
4808 
4809 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4810 		gfp_t mem_flags, dma_addr_t *dma_handle)
4811 {
4812 	void *kernel_addr;
4813 
4814 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4815 		return NULL;
4816 
4817 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4818 
4819 	/* Shift to the device's base physical address of host memory */
4820 	if (kernel_addr)
4821 		*dma_handle += HOST_PHYS_BASE;
4822 
4823 	return kernel_addr;
4824 }
4825 
4826 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4827 			dma_addr_t dma_addr)
4828 {
4829 	/* Cancel the device's base physical address of host memory */
4830 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4831 
4832 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4833 }
4834 
4835 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4836 					size_t size, dma_addr_t *dma_handle)
4837 {
4838 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4839 }
4840 
4841 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4842 						size_t size, void *vaddr)
4843 {
4844 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4845 }
4846 
4847 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4848 {
4849 	struct scatterlist *sg, *sg_next_iter;
4850 	u32 count, dma_desc_cnt;
4851 	u64 len, len_next;
4852 	dma_addr_t addr, addr_next;
4853 
4854 	dma_desc_cnt = 0;
4855 
4856 	for_each_sgtable_dma_sg(sgt, sg, count) {
4857 		len = sg_dma_len(sg);
4858 		addr = sg_dma_address(sg);
4859 
4860 		if (len == 0)
4861 			break;
4862 
4863 		while ((count + 1) < sgt->nents) {
4864 			sg_next_iter = sg_next(sg);
4865 			len_next = sg_dma_len(sg_next_iter);
4866 			addr_next = sg_dma_address(sg_next_iter);
4867 
4868 			if (len_next == 0)
4869 				break;
4870 
4871 			if ((addr + len == addr_next) &&
4872 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4873 				len += len_next;
4874 				count++;
4875 				sg = sg_next_iter;
4876 			} else {
4877 				break;
4878 			}
4879 		}
4880 
4881 		dma_desc_cnt++;
4882 	}
4883 
4884 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4885 }
4886 
4887 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4888 				struct hl_cs_parser *parser,
4889 				struct packet_lin_dma *user_dma_pkt,
4890 				u64 addr, enum dma_data_direction dir)
4891 {
4892 	struct hl_userptr *userptr;
4893 	int rc;
4894 
4895 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4896 			parser->job_userptr_list, &userptr))
4897 		goto already_pinned;
4898 
4899 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4900 	if (!userptr)
4901 		return -ENOMEM;
4902 
4903 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4904 				userptr);
4905 	if (rc)
4906 		goto free_userptr;
4907 
4908 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4909 
4910 	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4911 	if (rc) {
4912 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4913 		goto unpin_memory;
4914 	}
4915 
4916 	userptr->dma_mapped = true;
4917 	userptr->dir = dir;
4918 
4919 already_pinned:
4920 	parser->patched_cb_size +=
4921 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4922 
4923 	return 0;
4924 
4925 unpin_memory:
4926 	list_del(&userptr->job_node);
4927 	hl_unpin_host_memory(hdev, userptr);
4928 free_userptr:
4929 	kfree(userptr);
4930 	return rc;
4931 }
4932 
4933 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4934 				struct hl_cs_parser *parser,
4935 				struct packet_lin_dma *user_dma_pkt,
4936 				bool src_in_host)
4937 {
4938 	enum dma_data_direction dir;
4939 	bool skip_host_mem_pin = false, user_memset;
4940 	u64 addr;
4941 	int rc = 0;
4942 
4943 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4944 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4945 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4946 
4947 	if (src_in_host) {
4948 		if (user_memset)
4949 			skip_host_mem_pin = true;
4950 
4951 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4952 		dir = DMA_TO_DEVICE;
4953 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4954 	} else {
4955 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4956 		dir = DMA_FROM_DEVICE;
4957 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4958 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4959 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4960 	}
4961 
4962 	if (skip_host_mem_pin)
4963 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4964 	else
4965 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4966 						addr, dir);
4967 
4968 	return rc;
4969 }
4970 
4971 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4972 				struct hl_cs_parser *parser,
4973 				struct packet_lin_dma *user_dma_pkt)
4974 {
4975 	bool src_in_host = false;
4976 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4977 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4978 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4979 
4980 	dev_dbg(hdev->dev, "DMA packet details:\n");
4981 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4982 				le64_to_cpu(user_dma_pkt->src_addr));
4983 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4984 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4985 
4986 	/*
4987 	 * Special handling for DMA with size 0. Bypass all validations
4988 	 * because no transactions will be done except for WR_COMP, which
4989 	 * is not a security issue
4990 	 */
4991 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
4992 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4993 		return 0;
4994 	}
4995 
4996 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4997 		src_in_host = true;
4998 
4999 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5000 						src_in_host);
5001 }
5002 
5003 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5004 					struct hl_cs_parser *parser,
5005 					struct packet_load_and_exe *user_pkt)
5006 {
5007 	u32 cfg;
5008 
5009 	cfg = le32_to_cpu(user_pkt->cfg);
5010 
5011 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5012 		dev_err(hdev->dev,
5013 			"User not allowed to use Load and Execute\n");
5014 		return -EPERM;
5015 	}
5016 
5017 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5018 
5019 	return 0;
5020 }
5021 
5022 static int gaudi_validate_cb(struct hl_device *hdev,
5023 			struct hl_cs_parser *parser, bool is_mmu)
5024 {
5025 	u32 cb_parsed_length = 0;
5026 	int rc = 0;
5027 
5028 	parser->patched_cb_size = 0;
5029 
5030 	/* cb_user_size is more than 0 so loop will always be executed */
5031 	while (cb_parsed_length < parser->user_cb_size) {
5032 		enum packet_id pkt_id;
5033 		u16 pkt_size;
5034 		struct gaudi_packet *user_pkt;
5035 
5036 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5037 
5038 		pkt_id = (enum packet_id) (
5039 				(le64_to_cpu(user_pkt->header) &
5040 				PACKET_HEADER_PACKET_ID_MASK) >>
5041 					PACKET_HEADER_PACKET_ID_SHIFT);
5042 
5043 		if (!validate_packet_id(pkt_id)) {
5044 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5045 			rc = -EINVAL;
5046 			break;
5047 		}
5048 
5049 		pkt_size = gaudi_packet_sizes[pkt_id];
5050 		cb_parsed_length += pkt_size;
5051 		if (cb_parsed_length > parser->user_cb_size) {
5052 			dev_err(hdev->dev,
5053 				"packet 0x%x is out of CB boundary\n", pkt_id);
5054 			rc = -EINVAL;
5055 			break;
5056 		}
5057 
5058 		switch (pkt_id) {
5059 		case PACKET_MSG_PROT:
5060 			dev_err(hdev->dev,
5061 				"User not allowed to use MSG_PROT\n");
5062 			rc = -EPERM;
5063 			break;
5064 
5065 		case PACKET_CP_DMA:
5066 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5067 			rc = -EPERM;
5068 			break;
5069 
5070 		case PACKET_STOP:
5071 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5072 			rc = -EPERM;
5073 			break;
5074 
5075 		case PACKET_WREG_BULK:
5076 			dev_err(hdev->dev,
5077 				"User not allowed to use WREG_BULK\n");
5078 			rc = -EPERM;
5079 			break;
5080 
5081 		case PACKET_LOAD_AND_EXE:
5082 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5083 				(struct packet_load_and_exe *) user_pkt);
5084 			break;
5085 
5086 		case PACKET_LIN_DMA:
5087 			parser->contains_dma_pkt = true;
5088 			if (is_mmu)
5089 				parser->patched_cb_size += pkt_size;
5090 			else
5091 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5092 					(struct packet_lin_dma *) user_pkt);
5093 			break;
5094 
5095 		case PACKET_WREG_32:
5096 		case PACKET_MSG_LONG:
5097 		case PACKET_MSG_SHORT:
5098 		case PACKET_REPEAT:
5099 		case PACKET_FENCE:
5100 		case PACKET_NOP:
5101 		case PACKET_ARB_POINT:
5102 			parser->patched_cb_size += pkt_size;
5103 			break;
5104 
5105 		default:
5106 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5107 				pkt_id);
5108 			rc = -EINVAL;
5109 			break;
5110 		}
5111 
5112 		if (rc)
5113 			break;
5114 	}
5115 
5116 	/*
5117 	 * The new CB should have space at the end for two MSG_PROT packets:
5118 	 * 1. Optional NOP padding for cacheline alignment
5119 	 * 2. A packet that will act as a completion packet
5120 	 * 3. A packet that will generate MSI interrupt
5121 	 */
5122 	if (parser->completion)
5123 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5124 			parser->patched_cb_size);
5125 
5126 	return rc;
5127 }
5128 
5129 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5130 				struct hl_cs_parser *parser,
5131 				struct packet_lin_dma *user_dma_pkt,
5132 				struct packet_lin_dma *new_dma_pkt,
5133 				u32 *new_dma_pkt_size)
5134 {
5135 	struct hl_userptr *userptr;
5136 	struct scatterlist *sg, *sg_next_iter;
5137 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5138 	u64 len, len_next;
5139 	dma_addr_t dma_addr, dma_addr_next;
5140 	u64 device_memory_addr, addr;
5141 	enum dma_data_direction dir;
5142 	struct sg_table *sgt;
5143 	bool src_in_host = false;
5144 	bool skip_host_mem_pin = false;
5145 	bool user_memset;
5146 
5147 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5148 
5149 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5150 		src_in_host = true;
5151 
5152 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5153 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5154 
5155 	if (src_in_host) {
5156 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5157 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5158 		dir = DMA_TO_DEVICE;
5159 		if (user_memset)
5160 			skip_host_mem_pin = true;
5161 	} else {
5162 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5163 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5164 		dir = DMA_FROM_DEVICE;
5165 	}
5166 
5167 	if ((!skip_host_mem_pin) &&
5168 		(!hl_userptr_is_pinned(hdev, addr,
5169 					le32_to_cpu(user_dma_pkt->tsize),
5170 					parser->job_userptr_list, &userptr))) {
5171 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5172 				addr, user_dma_pkt->tsize);
5173 		return -EFAULT;
5174 	}
5175 
5176 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5177 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5178 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5179 		return 0;
5180 	}
5181 
5182 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5183 
5184 	sgt = userptr->sgt;
5185 	dma_desc_cnt = 0;
5186 
5187 	for_each_sgtable_dma_sg(sgt, sg, count) {
5188 		len = sg_dma_len(sg);
5189 		dma_addr = sg_dma_address(sg);
5190 
5191 		if (len == 0)
5192 			break;
5193 
5194 		while ((count + 1) < sgt->nents) {
5195 			sg_next_iter = sg_next(sg);
5196 			len_next = sg_dma_len(sg_next_iter);
5197 			dma_addr_next = sg_dma_address(sg_next_iter);
5198 
5199 			if (len_next == 0)
5200 				break;
5201 
5202 			if ((dma_addr + len == dma_addr_next) &&
5203 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5204 				len += len_next;
5205 				count++;
5206 				sg = sg_next_iter;
5207 			} else {
5208 				break;
5209 			}
5210 		}
5211 
5212 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5213 		if (likely(dma_desc_cnt))
5214 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5215 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5216 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5217 		new_dma_pkt->tsize = cpu_to_le32(len);
5218 
5219 		if (dir == DMA_TO_DEVICE) {
5220 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5221 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5222 		} else {
5223 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5224 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5225 		}
5226 
5227 		if (!user_memset)
5228 			device_memory_addr += len;
5229 		dma_desc_cnt++;
5230 		new_dma_pkt++;
5231 	}
5232 
5233 	if (!dma_desc_cnt) {
5234 		dev_err(hdev->dev,
5235 			"Error of 0 SG entries when patching DMA packet\n");
5236 		return -EFAULT;
5237 	}
5238 
5239 	/* Fix the last dma packet - wrcomp must be as user set it */
5240 	new_dma_pkt--;
5241 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5242 
5243 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5244 
5245 	return 0;
5246 }
5247 
5248 static int gaudi_patch_cb(struct hl_device *hdev,
5249 				struct hl_cs_parser *parser)
5250 {
5251 	u32 cb_parsed_length = 0;
5252 	u32 cb_patched_cur_length = 0;
5253 	int rc = 0;
5254 
5255 	/* cb_user_size is more than 0 so loop will always be executed */
5256 	while (cb_parsed_length < parser->user_cb_size) {
5257 		enum packet_id pkt_id;
5258 		u16 pkt_size;
5259 		u32 new_pkt_size = 0;
5260 		struct gaudi_packet *user_pkt, *kernel_pkt;
5261 
5262 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5263 		kernel_pkt = parser->patched_cb->kernel_address +
5264 					cb_patched_cur_length;
5265 
5266 		pkt_id = (enum packet_id) (
5267 				(le64_to_cpu(user_pkt->header) &
5268 				PACKET_HEADER_PACKET_ID_MASK) >>
5269 					PACKET_HEADER_PACKET_ID_SHIFT);
5270 
5271 		if (!validate_packet_id(pkt_id)) {
5272 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5273 			rc = -EINVAL;
5274 			break;
5275 		}
5276 
5277 		pkt_size = gaudi_packet_sizes[pkt_id];
5278 		cb_parsed_length += pkt_size;
5279 		if (cb_parsed_length > parser->user_cb_size) {
5280 			dev_err(hdev->dev,
5281 				"packet 0x%x is out of CB boundary\n", pkt_id);
5282 			rc = -EINVAL;
5283 			break;
5284 		}
5285 
5286 		switch (pkt_id) {
5287 		case PACKET_LIN_DMA:
5288 			rc = gaudi_patch_dma_packet(hdev, parser,
5289 					(struct packet_lin_dma *) user_pkt,
5290 					(struct packet_lin_dma *) kernel_pkt,
5291 					&new_pkt_size);
5292 			cb_patched_cur_length += new_pkt_size;
5293 			break;
5294 
5295 		case PACKET_MSG_PROT:
5296 			dev_err(hdev->dev,
5297 				"User not allowed to use MSG_PROT\n");
5298 			rc = -EPERM;
5299 			break;
5300 
5301 		case PACKET_CP_DMA:
5302 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5303 			rc = -EPERM;
5304 			break;
5305 
5306 		case PACKET_STOP:
5307 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5308 			rc = -EPERM;
5309 			break;
5310 
5311 		case PACKET_WREG_32:
5312 		case PACKET_WREG_BULK:
5313 		case PACKET_MSG_LONG:
5314 		case PACKET_MSG_SHORT:
5315 		case PACKET_REPEAT:
5316 		case PACKET_FENCE:
5317 		case PACKET_NOP:
5318 		case PACKET_ARB_POINT:
5319 		case PACKET_LOAD_AND_EXE:
5320 			memcpy(kernel_pkt, user_pkt, pkt_size);
5321 			cb_patched_cur_length += pkt_size;
5322 			break;
5323 
5324 		default:
5325 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5326 				pkt_id);
5327 			rc = -EINVAL;
5328 			break;
5329 		}
5330 
5331 		if (rc)
5332 			break;
5333 	}
5334 
5335 	return rc;
5336 }
5337 
5338 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5339 		struct hl_cs_parser *parser)
5340 {
5341 	u64 handle;
5342 	u32 patched_cb_size;
5343 	struct hl_cb *user_cb;
5344 	int rc;
5345 
5346 	/*
5347 	 * The new CB should have space at the end for two MSG_PROT packets:
5348 	 * 1. Optional NOP padding for cacheline alignment
5349 	 * 2. A packet that will act as a completion packet
5350 	 * 3. A packet that will generate MSI interrupt
5351 	 */
5352 	if (parser->completion)
5353 		parser->patched_cb_size = parser->user_cb_size +
5354 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5355 	else
5356 		parser->patched_cb_size = parser->user_cb_size;
5357 
5358 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5359 				parser->patched_cb_size, false, false,
5360 				&handle);
5361 
5362 	if (rc) {
5363 		dev_err(hdev->dev,
5364 			"Failed to allocate patched CB for DMA CS %d\n",
5365 			rc);
5366 		return rc;
5367 	}
5368 
5369 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5370 	/* hl_cb_get should never fail */
5371 	if (!parser->patched_cb) {
5372 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5373 		rc = -EFAULT;
5374 		goto out;
5375 	}
5376 
5377 	/*
5378 	 * We are protected from overflow because the check
5379 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5380 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5381 	 *
5382 	 * There is no option to reach here without going through that check because:
5383 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5384 	 *    an external queue.
5385 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5386 	 */
5387 	memcpy(parser->patched_cb->kernel_address,
5388 		parser->user_cb->kernel_address,
5389 		parser->user_cb_size);
5390 
5391 	patched_cb_size = parser->patched_cb_size;
5392 
5393 	/* Validate patched CB instead of user CB */
5394 	user_cb = parser->user_cb;
5395 	parser->user_cb = parser->patched_cb;
5396 	rc = gaudi_validate_cb(hdev, parser, true);
5397 	parser->user_cb = user_cb;
5398 
5399 	if (rc) {
5400 		hl_cb_put(parser->patched_cb);
5401 		goto out;
5402 	}
5403 
5404 	if (patched_cb_size != parser->patched_cb_size) {
5405 		dev_err(hdev->dev, "user CB size mismatch\n");
5406 		hl_cb_put(parser->patched_cb);
5407 		rc = -EINVAL;
5408 		goto out;
5409 	}
5410 
5411 out:
5412 	/*
5413 	 * Always call cb destroy here because we still have 1 reference
5414 	 * to it by calling cb_get earlier. After the job will be completed,
5415 	 * cb_put will release it, but here we want to remove it from the
5416 	 * idr
5417 	 */
5418 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5419 
5420 	return rc;
5421 }
5422 
5423 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5424 		struct hl_cs_parser *parser)
5425 {
5426 	u64 handle;
5427 	int rc;
5428 
5429 	rc = gaudi_validate_cb(hdev, parser, false);
5430 
5431 	if (rc)
5432 		goto free_userptr;
5433 
5434 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5435 				parser->patched_cb_size, false, false,
5436 				&handle);
5437 	if (rc) {
5438 		dev_err(hdev->dev,
5439 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5440 		goto free_userptr;
5441 	}
5442 
5443 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5444 	/* hl_cb_get should never fail here */
5445 	if (!parser->patched_cb) {
5446 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5447 		rc = -EFAULT;
5448 		goto out;
5449 	}
5450 
5451 	rc = gaudi_patch_cb(hdev, parser);
5452 
5453 	if (rc)
5454 		hl_cb_put(parser->patched_cb);
5455 
5456 out:
5457 	/*
5458 	 * Always call cb destroy here because we still have 1 reference
5459 	 * to it by calling cb_get earlier. After the job will be completed,
5460 	 * cb_put will release it, but here we want to remove it from the
5461 	 * idr
5462 	 */
5463 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5464 
5465 free_userptr:
5466 	if (rc)
5467 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5468 	return rc;
5469 }
5470 
5471 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5472 					struct hl_cs_parser *parser)
5473 {
5474 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5475 	struct gaudi_device *gaudi = hdev->asic_specific;
5476 	u32 nic_queue_offset, nic_mask_q_id;
5477 
5478 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5479 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5480 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5481 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5482 
5483 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5484 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5485 			return -EINVAL;
5486 		}
5487 	}
5488 
5489 	/* For internal queue jobs just check if CB address is valid */
5490 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5491 					parser->user_cb_size,
5492 					asic_prop->sram_user_base_address,
5493 					asic_prop->sram_end_address))
5494 		return 0;
5495 
5496 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5497 					parser->user_cb_size,
5498 					asic_prop->dram_user_base_address,
5499 					asic_prop->dram_end_address))
5500 		return 0;
5501 
5502 	/* PMMU and HPMMU addresses are equal, check only one of them */
5503 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5504 					parser->user_cb_size,
5505 					asic_prop->pmmu.start_addr,
5506 					asic_prop->pmmu.end_addr))
5507 		return 0;
5508 
5509 	dev_err(hdev->dev,
5510 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5511 		parser->user_cb, parser->user_cb_size);
5512 
5513 	return -EFAULT;
5514 }
5515 
5516 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5517 {
5518 	struct gaudi_device *gaudi = hdev->asic_specific;
5519 
5520 	if (parser->queue_type == QUEUE_TYPE_INT)
5521 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5522 
5523 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5524 		return gaudi_parse_cb_mmu(hdev, parser);
5525 	else
5526 		return gaudi_parse_cb_no_mmu(hdev, parser);
5527 }
5528 
5529 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5530 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5531 				u32 msi_vec, bool eb)
5532 {
5533 	struct packet_msg_prot *cq_pkt;
5534 	struct packet_nop *cq_padding;
5535 	u64 msi_addr;
5536 	u32 tmp;
5537 
5538 	cq_padding = kernel_address + original_len;
5539 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5540 
5541 	while ((void *)cq_padding < (void *)cq_pkt) {
5542 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5543 		cq_padding++;
5544 	}
5545 
5546 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5547 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5548 
5549 	if (eb)
5550 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5551 
5552 	cq_pkt->ctl = cpu_to_le32(tmp);
5553 	cq_pkt->value = cpu_to_le32(cq_val);
5554 	cq_pkt->addr = cpu_to_le64(cq_addr);
5555 
5556 	cq_pkt++;
5557 
5558 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5559 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5560 	cq_pkt->ctl = cpu_to_le32(tmp);
5561 	cq_pkt->value = cpu_to_le32(1);
5562 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5563 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5564 }
5565 
5566 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5567 {
5568 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5569 }
5570 
5571 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5572 					u32 size, u64 val)
5573 {
5574 	struct packet_lin_dma *lin_dma_pkt;
5575 	struct hl_cs_job *job;
5576 	u32 cb_size, ctl, err_cause;
5577 	struct hl_cb *cb;
5578 	int rc;
5579 
5580 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5581 	if (!cb)
5582 		return -EFAULT;
5583 
5584 	lin_dma_pkt = cb->kernel_address;
5585 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5586 	cb_size = sizeof(*lin_dma_pkt);
5587 
5588 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5589 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5590 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5591 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5592 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5593 
5594 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5595 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5596 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5597 	lin_dma_pkt->tsize = cpu_to_le32(size);
5598 
5599 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5600 	if (!job) {
5601 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5602 		rc = -ENOMEM;
5603 		goto release_cb;
5604 	}
5605 
5606 	/* Verify DMA is OK */
5607 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5608 	if (err_cause && !hdev->init_done) {
5609 		dev_dbg(hdev->dev,
5610 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5611 			err_cause);
5612 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5613 	}
5614 
5615 	job->id = 0;
5616 	job->user_cb = cb;
5617 	atomic_inc(&job->user_cb->cs_cnt);
5618 	job->user_cb_size = cb_size;
5619 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5620 	job->patched_cb = job->user_cb;
5621 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5622 
5623 	hl_debugfs_add_job(hdev, job);
5624 
5625 	rc = gaudi_send_job_on_qman0(hdev, job);
5626 	hl_debugfs_remove_job(hdev, job);
5627 	kfree(job);
5628 	atomic_dec(&cb->cs_cnt);
5629 
5630 	/* Verify DMA is OK */
5631 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5632 	if (err_cause) {
5633 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5634 		rc = -EIO;
5635 		if (!hdev->init_done) {
5636 			dev_dbg(hdev->dev,
5637 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5638 				err_cause);
5639 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5640 		}
5641 	}
5642 
5643 release_cb:
5644 	hl_cb_put(cb);
5645 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5646 
5647 	return rc;
5648 }
5649 
5650 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5651 					u32 num_regs, u32 val)
5652 {
5653 	struct packet_msg_long *pkt;
5654 	struct hl_cs_job *job;
5655 	u32 cb_size, ctl;
5656 	struct hl_cb *cb;
5657 	int i, rc;
5658 
5659 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5660 
5661 	if (cb_size > SZ_2M) {
5662 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5663 		return -ENOMEM;
5664 	}
5665 
5666 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5667 	if (!cb)
5668 		return -EFAULT;
5669 
5670 	pkt = cb->kernel_address;
5671 
5672 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5673 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5674 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5675 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5676 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5677 
5678 	for (i = 0; i < num_regs ; i++, pkt++) {
5679 		pkt->ctl = cpu_to_le32(ctl);
5680 		pkt->value = cpu_to_le32(val);
5681 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5682 	}
5683 
5684 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5685 	if (!job) {
5686 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5687 		rc = -ENOMEM;
5688 		goto release_cb;
5689 	}
5690 
5691 	job->id = 0;
5692 	job->user_cb = cb;
5693 	atomic_inc(&job->user_cb->cs_cnt);
5694 	job->user_cb_size = cb_size;
5695 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5696 	job->patched_cb = job->user_cb;
5697 	job->job_cb_size = cb_size;
5698 
5699 	hl_debugfs_add_job(hdev, job);
5700 
5701 	rc = gaudi_send_job_on_qman0(hdev, job);
5702 	hl_debugfs_remove_job(hdev, job);
5703 	kfree(job);
5704 	atomic_dec(&cb->cs_cnt);
5705 
5706 release_cb:
5707 	hl_cb_put(cb);
5708 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5709 
5710 	return rc;
5711 }
5712 
5713 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5714 {
5715 	u64 base_addr;
5716 	u32 num_regs;
5717 	int rc;
5718 
5719 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5720 	num_regs = NUM_OF_SOB_IN_BLOCK;
5721 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5722 	if (rc) {
5723 		dev_err(hdev->dev, "failed resetting SM registers");
5724 		return -ENOMEM;
5725 	}
5726 
5727 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5728 	num_regs = NUM_OF_SOB_IN_BLOCK;
5729 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5730 	if (rc) {
5731 		dev_err(hdev->dev, "failed resetting SM registers");
5732 		return -ENOMEM;
5733 	}
5734 
5735 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5736 	num_regs = NUM_OF_SOB_IN_BLOCK;
5737 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5738 	if (rc) {
5739 		dev_err(hdev->dev, "failed resetting SM registers");
5740 		return -ENOMEM;
5741 	}
5742 
5743 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5744 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5745 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5746 	if (rc) {
5747 		dev_err(hdev->dev, "failed resetting SM registers");
5748 		return -ENOMEM;
5749 	}
5750 
5751 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5752 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5753 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5754 	if (rc) {
5755 		dev_err(hdev->dev, "failed resetting SM registers");
5756 		return -ENOMEM;
5757 	}
5758 
5759 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5760 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5761 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5762 	if (rc) {
5763 		dev_err(hdev->dev, "failed resetting SM registers");
5764 		return -ENOMEM;
5765 	}
5766 
5767 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5768 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5769 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5770 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771 	if (rc) {
5772 		dev_err(hdev->dev, "failed resetting SM registers");
5773 		return -ENOMEM;
5774 	}
5775 
5776 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5777 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5778 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5779 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5780 	if (rc) {
5781 		dev_err(hdev->dev, "failed resetting SM registers");
5782 		return -ENOMEM;
5783 	}
5784 
5785 	return 0;
5786 }
5787 
5788 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5789 {
5790 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5791 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5792 	int i;
5793 
5794 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5795 		u64 sob_addr = CFG_BASE +
5796 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5797 				(i * sob_delta);
5798 		u32 dma_offset = i * DMA_CORE_OFFSET;
5799 
5800 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5801 				lower_32_bits(sob_addr));
5802 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5803 				upper_32_bits(sob_addr));
5804 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5805 
5806 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5807 		 * modified by the user for SRAM reduction
5808 		 */
5809 		if (i > 1)
5810 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5811 								0x00000001);
5812 	}
5813 }
5814 
5815 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5816 {
5817 	u32 qman_offset;
5818 	int i;
5819 
5820 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5821 		qman_offset = i * DMA_QMAN_OFFSET;
5822 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5823 	}
5824 
5825 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5826 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5827 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5828 	}
5829 
5830 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5831 		qman_offset = i * TPC_QMAN_OFFSET;
5832 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5833 	}
5834 
5835 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5836 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5837 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5838 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5839 	}
5840 }
5841 
5842 static int gaudi_restore_user_registers(struct hl_device *hdev)
5843 {
5844 	int rc;
5845 
5846 	rc = gaudi_restore_sm_registers(hdev);
5847 	if (rc)
5848 		return rc;
5849 
5850 	gaudi_restore_dma_registers(hdev);
5851 	gaudi_restore_qm_registers(hdev);
5852 
5853 	return 0;
5854 }
5855 
5856 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5857 {
5858 	return 0;
5859 }
5860 
5861 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5862 {
5863 	u32 size = hdev->asic_prop.mmu_pgt_size +
5864 			hdev->asic_prop.mmu_cache_mng_size;
5865 	struct gaudi_device *gaudi = hdev->asic_specific;
5866 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5867 
5868 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5869 		return 0;
5870 
5871 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5872 }
5873 
5874 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5875 {
5876 
5877 }
5878 
5879 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5880 					u32 size_to_dma, dma_addr_t dma_addr)
5881 {
5882 	u32 err_cause, val;
5883 	u64 dma_offset;
5884 	int rc;
5885 
5886 	dma_offset = dma_id * DMA_CORE_OFFSET;
5887 
5888 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5889 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5890 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5891 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5892 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5893 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5894 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5895 
5896 	rc = hl_poll_timeout(
5897 		hdev,
5898 		mmDMA0_CORE_STS0 + dma_offset,
5899 		val,
5900 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5901 		0,
5902 		1000000);
5903 
5904 	if (rc) {
5905 		dev_err(hdev->dev,
5906 			"DMA %d timed-out during reading of 0x%llx\n",
5907 			dma_id, addr);
5908 		return -EIO;
5909 	}
5910 
5911 	/* Verify DMA is OK */
5912 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5913 	if (err_cause) {
5914 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5915 		dev_dbg(hdev->dev,
5916 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5917 			err_cause);
5918 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5919 
5920 		return -EIO;
5921 	}
5922 
5923 	return 0;
5924 }
5925 
5926 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5927 				void *blob_addr)
5928 {
5929 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5930 	u32 qm_glbl_sts0, qm_cgm_sts;
5931 	u64 dma_offset, qm_offset;
5932 	dma_addr_t dma_addr;
5933 	void *kernel_addr;
5934 	bool is_eng_idle;
5935 	int rc = 0, dma_id;
5936 
5937 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5938 
5939 	if (!kernel_addr)
5940 		return -ENOMEM;
5941 
5942 	hdev->asic_funcs->hw_queues_lock(hdev);
5943 
5944 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5945 	dma_offset = dma_id * DMA_CORE_OFFSET;
5946 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5947 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5948 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5949 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5950 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5951 		      IS_DMA_IDLE(dma_core_sts0);
5952 
5953 	if (!is_eng_idle) {
5954 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5955 		dma_offset = dma_id * DMA_CORE_OFFSET;
5956 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5957 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5958 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5959 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5960 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5961 			      IS_DMA_IDLE(dma_core_sts0);
5962 
5963 		if (!is_eng_idle) {
5964 			dev_err_ratelimited(hdev->dev,
5965 				"Can't read via DMA because it is BUSY\n");
5966 			rc = -EAGAIN;
5967 			goto out;
5968 		}
5969 	}
5970 
5971 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5972 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5973 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5974 
5975 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5976 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5977 	 * ASID
5978 	 */
5979 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5980 
5981 	/* Verify DMA is OK */
5982 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5983 	if (err_cause) {
5984 		dev_dbg(hdev->dev,
5985 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5986 			err_cause);
5987 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5988 	}
5989 
5990 	pos = 0;
5991 	size_left = size;
5992 	size_to_dma = SZ_2M;
5993 
5994 	while (size_left > 0) {
5995 
5996 		if (size_left < SZ_2M)
5997 			size_to_dma = size_left;
5998 
5999 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6000 						dma_addr);
6001 		if (rc)
6002 			break;
6003 
6004 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6005 
6006 		if (size_left <= SZ_2M)
6007 			break;
6008 
6009 		pos += SZ_2M;
6010 		addr += SZ_2M;
6011 		size_left -= SZ_2M;
6012 	}
6013 
6014 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6015 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6016 	 * ASID
6017 	 */
6018 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6019 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6020 
6021 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6022 
6023 out:
6024 	hdev->asic_funcs->hw_queues_unlock(hdev);
6025 
6026 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6027 
6028 	return rc;
6029 }
6030 
6031 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6032 {
6033 	struct gaudi_device *gaudi = hdev->asic_specific;
6034 
6035 	if (hdev->reset_info.hard_reset_pending)
6036 		return U64_MAX;
6037 
6038 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6039 			(addr - gaudi->hbm_bar_cur_addr));
6040 }
6041 
6042 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6043 {
6044 	struct gaudi_device *gaudi = hdev->asic_specific;
6045 
6046 	if (hdev->reset_info.hard_reset_pending)
6047 		return;
6048 
6049 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6050 			(addr - gaudi->hbm_bar_cur_addr));
6051 }
6052 
6053 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6054 {
6055 	/* mask to zero the MMBP and ASID bits */
6056 	WREG32_AND(reg, ~0x7FF);
6057 	WREG32_OR(reg, asid);
6058 }
6059 
6060 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6061 {
6062 	struct gaudi_device *gaudi = hdev->asic_specific;
6063 
6064 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6065 		return;
6066 
6067 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6068 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6069 		return;
6070 	}
6071 
6072 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6073 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6074 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6075 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6076 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6077 
6078 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6079 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6080 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6081 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6082 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6083 
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6085 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6086 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6087 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6088 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6089 
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6091 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6092 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6093 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6094 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6095 
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6097 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6098 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6099 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6100 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6101 
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6103 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6104 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6105 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6106 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6107 
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6109 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6110 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6111 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6112 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6113 
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6115 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6116 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6117 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6118 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6119 
6120 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6121 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6122 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6123 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6124 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6125 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6126 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6127 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6128 
6129 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6130 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6133 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6134 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6135 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6136 
6137 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6138 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6141 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6144 
6145 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6146 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6152 
6153 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6160 
6161 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6168 
6169 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6176 
6177 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6184 
6185 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6192 
6193 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6194 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6203 
6204 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6213 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6214 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6215 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6216 
6217 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6218 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6219 				asid);
6220 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6221 				asid);
6222 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6223 				asid);
6224 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6225 				asid);
6226 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6227 				asid);
6228 	}
6229 
6230 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6231 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6232 				asid);
6233 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6234 				asid);
6235 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6236 				asid);
6237 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6238 				asid);
6239 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6240 				asid);
6241 	}
6242 
6243 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6244 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6245 				asid);
6246 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6247 				asid);
6248 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6249 				asid);
6250 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6251 				asid);
6252 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6253 				asid);
6254 	}
6255 
6256 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6257 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6258 				asid);
6259 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6260 				asid);
6261 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6262 				asid);
6263 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6264 				asid);
6265 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6266 				asid);
6267 	}
6268 
6269 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6270 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6271 				asid);
6272 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6273 				asid);
6274 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6275 				asid);
6276 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6277 				asid);
6278 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6279 				asid);
6280 	}
6281 
6282 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6283 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6284 				asid);
6285 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6286 				asid);
6287 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6288 				asid);
6289 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6290 				asid);
6291 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6292 				asid);
6293 	}
6294 
6295 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6296 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6297 				asid);
6298 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6299 				asid);
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6301 				asid);
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6305 				asid);
6306 	}
6307 
6308 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6309 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6310 				asid);
6311 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6312 				asid);
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6314 				asid);
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6318 				asid);
6319 	}
6320 
6321 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6322 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6323 				asid);
6324 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6325 				asid);
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6327 				asid);
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6331 				asid);
6332 	}
6333 
6334 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6335 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6336 				asid);
6337 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6338 				asid);
6339 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6340 				asid);
6341 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6342 				asid);
6343 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6344 				asid);
6345 	}
6346 
6347 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6348 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6349 }
6350 
6351 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6352 		struct hl_cs_job *job)
6353 {
6354 	struct packet_msg_prot *fence_pkt;
6355 	u32 *fence_ptr;
6356 	dma_addr_t fence_dma_addr;
6357 	struct hl_cb *cb;
6358 	u32 tmp, timeout, dma_offset;
6359 	int rc;
6360 
6361 	if (hdev->pldm)
6362 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6363 	else
6364 		timeout = HL_DEVICE_TIMEOUT_USEC;
6365 
6366 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6367 	if (!fence_ptr) {
6368 		dev_err(hdev->dev,
6369 			"Failed to allocate fence memory for QMAN0\n");
6370 		return -ENOMEM;
6371 	}
6372 
6373 	cb = job->patched_cb;
6374 
6375 	fence_pkt = cb->kernel_address +
6376 			job->job_cb_size - sizeof(struct packet_msg_prot);
6377 
6378 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6379 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6380 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6381 
6382 	fence_pkt->ctl = cpu_to_le32(tmp);
6383 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6384 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6385 
6386 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6387 
6388 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6389 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6390 
6391 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6392 					job->job_cb_size, cb->bus_address);
6393 	if (rc) {
6394 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6395 		goto free_fence_ptr;
6396 	}
6397 
6398 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6399 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6400 				timeout, true);
6401 
6402 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6403 
6404 	if (rc == -ETIMEDOUT) {
6405 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6406 		goto free_fence_ptr;
6407 	}
6408 
6409 free_fence_ptr:
6410 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6411 
6412 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6413 	return rc;
6414 }
6415 
6416 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6417 {
6418 	if (event_type >= GAUDI_EVENT_SIZE)
6419 		goto event_not_supported;
6420 
6421 	if (!gaudi_irq_map_table[event_type].valid)
6422 		goto event_not_supported;
6423 
6424 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6425 
6426 	return;
6427 
6428 event_not_supported:
6429 	snprintf(desc, size, "N/A");
6430 }
6431 
6432 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6433 							bool is_write, u16 *engine_id_1,
6434 							u16 *engine_id_2)
6435 {
6436 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6437 
6438 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6439 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6440 
6441 	switch (x_y) {
6442 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6443 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6444 		dma_id[0] = 0;
6445 		dma_id[1] = 2;
6446 		break;
6447 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6448 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6449 		dma_id[0] = 1;
6450 		dma_id[1] = 3;
6451 		break;
6452 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6453 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6454 		dma_id[0] = 4;
6455 		dma_id[1] = 6;
6456 		break;
6457 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6458 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6459 		dma_id[0] = 5;
6460 		dma_id[1] = 7;
6461 		break;
6462 	default:
6463 		goto unknown_initiator;
6464 	}
6465 
6466 	for (i = 0 ; i < 2 ; i++) {
6467 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6468 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6469 	}
6470 
6471 	switch (x_y) {
6472 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6473 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6474 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6475 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6476 			return "DMA0";
6477 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6478 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6479 			return "DMA2";
6480 		} else {
6481 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6482 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6483 			return "DMA0 or DMA2";
6484 		}
6485 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6486 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6487 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6488 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6489 			return "DMA1";
6490 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6491 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6492 			return "DMA3";
6493 		} else {
6494 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6495 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6496 			return "DMA1 or DMA3";
6497 		}
6498 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6499 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6500 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6501 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6502 			return "DMA4";
6503 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6504 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6505 			return "DMA6";
6506 		} else {
6507 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6508 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6509 			return "DMA4 or DMA6";
6510 		}
6511 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6512 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6513 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6514 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6515 			return "DMA5";
6516 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6517 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6518 			return "DMA7";
6519 		} else {
6520 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6521 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6522 			return "DMA5 or DMA7";
6523 		}
6524 	}
6525 
6526 unknown_initiator:
6527 	return "unknown initiator";
6528 }
6529 
6530 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6531 							u16 *engine_id_1, u16 *engine_id_2)
6532 {
6533 	u32 val, x_y, axi_id;
6534 
6535 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6536 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6537 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6538 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6539 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6540 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6541 
6542 	switch (x_y) {
6543 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6544 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6545 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6546 			return "TPC0";
6547 		}
6548 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6549 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6550 			return "NIC0";
6551 		}
6552 		break;
6553 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6554 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6555 		return "TPC1";
6556 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6557 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6558 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6559 		return "MME0";
6560 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6561 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6562 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6563 		return "MME1";
6564 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6565 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6566 		return "TPC2";
6567 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6568 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6569 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6570 			return "TPC3";
6571 		}
6572 		/* PCI, CPU or PSOC does not have engine id*/
6573 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6574 			return "PCI";
6575 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6576 			return "CPU";
6577 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6578 			return "PSOC";
6579 		break;
6580 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6581 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6582 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6583 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6584 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6585 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6586 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6587 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6588 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6589 				engine_id_1, engine_id_2);
6590 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6591 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6592 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6593 			return "TPC4";
6594 		}
6595 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6596 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6597 			return "NIC1";
6598 		}
6599 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6600 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6601 			return "NIC2";
6602 		}
6603 		break;
6604 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6605 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6606 		return "TPC5";
6607 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6608 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6609 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6610 		return "MME2";
6611 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6612 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6613 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6614 		return "MME3";
6615 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6616 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6617 		return "TPC6";
6618 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6619 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6620 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6621 			return "TPC7";
6622 		}
6623 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6624 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6625 			return "NIC4";
6626 		}
6627 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6628 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6629 			return "NIC5";
6630 		}
6631 		break;
6632 	default:
6633 		break;
6634 	}
6635 
6636 	dev_err(hdev->dev,
6637 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6638 		val,
6639 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6640 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6641 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6642 			RAZWI_INITIATOR_AXI_ID_MASK);
6643 
6644 	return "unknown initiator";
6645 }
6646 
6647 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6648 						u16 *engine_id_2, bool *is_read, bool *is_write)
6649 {
6650 
6651 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6652 		dev_err_ratelimited(hdev->dev,
6653 			"RAZWI event caused by illegal write of %s\n",
6654 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6655 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6656 		*is_write = true;
6657 	}
6658 
6659 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6660 		dev_err_ratelimited(hdev->dev,
6661 			"RAZWI event caused by illegal read of %s\n",
6662 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6663 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6664 		*is_read = true;
6665 	}
6666 }
6667 
6668 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6669 {
6670 	struct gaudi_device *gaudi = hdev->asic_specific;
6671 	u32 val;
6672 
6673 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6674 		return;
6675 
6676 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6677 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6678 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6679 		*addr <<= 32;
6680 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6681 
6682 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6683 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6684 
6685 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6686 	}
6687 
6688 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6689 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6690 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6691 		*addr <<= 32;
6692 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6693 
6694 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6695 
6696 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6697 	}
6698 }
6699 
6700 /*
6701  *  +-------------------+------------------------------------------------------+
6702  *  | Configuration Reg |                     Description                      |
6703  *  |      Address      |                                                      |
6704  *  +-------------------+------------------------------------------------------+
6705  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6706  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6707  *  |                   |0xF34 memory wrappers 63:32                           |
6708  *  |                   |0xF38 memory wrappers 95:64                           |
6709  *  |                   |0xF3C memory wrappers 127:96                          |
6710  *  +-------------------+------------------------------------------------------+
6711  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6712  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6713  *  |                   |0xF44 memory wrappers 63:32                           |
6714  *  |                   |0xF48 memory wrappers 95:64                           |
6715  *  |                   |0xF4C memory wrappers 127:96                          |
6716  *  +-------------------+------------------------------------------------------+
6717  */
6718 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6719 		struct ecc_info_extract_params *params, u64 *ecc_address,
6720 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6721 {
6722 	u32 i, num_mem_regs, reg, err_bit;
6723 	u64 err_addr, err_word = 0;
6724 
6725 	num_mem_regs = params->num_memories / 32 +
6726 			((params->num_memories % 32) ? 1 : 0);
6727 
6728 	if (params->block_address >= CFG_BASE)
6729 		params->block_address -= CFG_BASE;
6730 
6731 	if (params->derr)
6732 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6733 	else
6734 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6735 
6736 	/* Set invalid wrapper index */
6737 	*memory_wrapper_idx = 0xFF;
6738 
6739 	/* Iterate through memory wrappers, a single bit must be set */
6740 	for (i = 0 ; i < num_mem_regs ; i++) {
6741 		err_addr += i * 4;
6742 		err_word = RREG32(err_addr);
6743 		if (err_word) {
6744 			err_bit = __ffs(err_word);
6745 			*memory_wrapper_idx = err_bit + (32 * i);
6746 			break;
6747 		}
6748 	}
6749 
6750 	if (*memory_wrapper_idx == 0xFF) {
6751 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6752 		return -EINVAL;
6753 	}
6754 
6755 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6756 			*memory_wrapper_idx);
6757 
6758 	*ecc_address =
6759 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6760 	*ecc_syndrom =
6761 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6762 
6763 	/* Clear error indication */
6764 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6765 	if (params->derr)
6766 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6767 	else
6768 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6769 
6770 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6771 
6772 	return 0;
6773 }
6774 
6775 /*
6776  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6777  *
6778  * @idx: the current pi/ci value
6779  * @q_len: the queue length (power of 2)
6780  *
6781  * @return the cyclically decremented index
6782  */
6783 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6784 {
6785 	u32 mask = q_len - 1;
6786 
6787 	/*
6788 	 * modular decrement is equivalent to adding (queue_size -1)
6789 	 * later we take LSBs to make sure the value is in the
6790 	 * range [0, queue_len - 1]
6791 	 */
6792 	return (idx + q_len - 1) & mask;
6793 }
6794 
6795 /**
6796  * gaudi_handle_sw_config_stream_data - print SW config stream data
6797  *
6798  * @hdev: pointer to the habanalabs device structure
6799  * @stream: the QMAN's stream
6800  * @qman_base: base address of QMAN registers block
6801  * @event_mask: mask of the last events occurred
6802  */
6803 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6804 						u64 qman_base, u64 event_mask)
6805 {
6806 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6807 	u32 cq_ptr_lo_off, size;
6808 
6809 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6810 
6811 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6812 						stream * cq_ptr_lo_off;
6813 	cq_ptr_hi = cq_ptr_lo +
6814 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6815 	cq_tsize = cq_ptr_lo +
6816 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6817 
6818 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6819 	size = RREG32(cq_tsize);
6820 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6821 							stream, cq_ptr, size);
6822 
6823 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6824 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6825 		hdev->captured_err_info.undef_opcode.cq_size = size;
6826 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6827 	}
6828 }
6829 
6830 /**
6831  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6832  *
6833  * @hdev: pointer to the habanalabs device structure
6834  * @qid_base: first QID of the QMAN (out of 4 streams)
6835  * @stream: the QMAN's stream
6836  * @qman_base: base address of QMAN registers block
6837  * @event_mask: mask of the last events occurred
6838  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6839  */
6840 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6841 						u32 stream, u64 qman_base,
6842 						u64 event_mask,
6843 						bool pr_sw_conf)
6844 {
6845 	u32 ci, qm_ci_stream_off, queue_len;
6846 	struct hl_hw_queue *q;
6847 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6848 	int i;
6849 
6850 	q = &hdev->kernel_queues[qid_base + stream];
6851 
6852 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6853 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6854 						stream * qm_ci_stream_off;
6855 
6856 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6857 					q->int_queue_len : HL_QUEUE_LENGTH;
6858 
6859 	hdev->asic_funcs->hw_queues_lock(hdev);
6860 
6861 	if (pr_sw_conf)
6862 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6863 
6864 	ci = RREG32(pq_ci);
6865 
6866 	/* we should start printing form ci -1 */
6867 	ci = gaudi_queue_idx_dec(ci, queue_len);
6868 	memset(addr, 0, sizeof(addr));
6869 
6870 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6871 		struct hl_bd *bd;
6872 		u32 len;
6873 
6874 		bd = q->kernel_address;
6875 		bd += ci;
6876 
6877 		len = le32_to_cpu(bd->len);
6878 		/* len 0 means uninitialized entry- break */
6879 		if (!len)
6880 			break;
6881 
6882 		addr[i] = le64_to_cpu(bd->ptr);
6883 
6884 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6885 							stream, ci, addr[i], len);
6886 
6887 		/* get previous ci, wrap if needed */
6888 		ci = gaudi_queue_idx_dec(ci, queue_len);
6889 	}
6890 
6891 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6892 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6893 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6894 
6895 		if (arr_idx == 0) {
6896 			undef_opcode->timestamp = ktime_get();
6897 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6898 		}
6899 
6900 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6901 		undef_opcode->cb_addr_streams_len++;
6902 	}
6903 
6904 	hdev->asic_funcs->hw_queues_unlock(hdev);
6905 }
6906 
6907 /**
6908  * handle_qman_data_on_err - extract QMAN data on error
6909  *
6910  * @hdev: pointer to the habanalabs device structure
6911  * @qid_base: first QID of the QMAN (out of 4 streams)
6912  * @stream: the QMAN's stream
6913  * @qman_base: base address of QMAN registers block
6914  * @event_mask: mask of the last events occurred
6915  *
6916  * This function attempt to exatract as much data as possible on QMAN error.
6917  * On upper CP print the SW config stream data and last 8 PQEs.
6918  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6919  */
6920 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6921 				   u32 stream, u64 qman_base, u64 event_mask)
6922 {
6923 	u32 i;
6924 
6925 	if (stream != QMAN_STREAMS) {
6926 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6927 			qman_base, event_mask, true);
6928 		return;
6929 	}
6930 
6931 	/* handle Lower-CP */
6932 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6933 
6934 	for (i = 0; i < QMAN_STREAMS; i++)
6935 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6936 			qman_base, event_mask, false);
6937 }
6938 
6939 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6940 					  const char *qm_name,
6941 					  u64 qman_base,
6942 					  u32 qid_base,
6943 					  u64 *event_mask)
6944 {
6945 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6946 	u64 glbl_sts_addr, arb_err_addr;
6947 	char reg_desc[32];
6948 
6949 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6950 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6951 
6952 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6953 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6954 		glbl_sts_clr_val = 0;
6955 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6956 
6957 		if (!glbl_sts_val)
6958 			continue;
6959 
6960 		if (i == QMAN_STREAMS)
6961 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6962 		else
6963 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6964 
6965 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6966 			if (glbl_sts_val & BIT(j)) {
6967 				dev_err_ratelimited(hdev->dev,
6968 						"%s %s. err cause: %s\n",
6969 						qm_name, reg_desc,
6970 						gaudi_qman_error_cause[j]);
6971 				glbl_sts_clr_val |= BIT(j);
6972 			}
6973 		}
6974 		/* check for undefined opcode */
6975 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6976 				hdev->captured_err_info.undef_opcode.write_enable) {
6977 			memset(&hdev->captured_err_info.undef_opcode, 0,
6978 						sizeof(hdev->captured_err_info.undef_opcode));
6979 
6980 			hdev->captured_err_info.undef_opcode.write_enable = false;
6981 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6982 		}
6983 
6984 		/* Write 1 clear errors */
6985 		if (!hdev->stop_on_err)
6986 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6987 		else
6988 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6989 	}
6990 
6991 	arb_err_val = RREG32(arb_err_addr);
6992 
6993 	if (!arb_err_val)
6994 		return;
6995 
6996 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6997 		if (arb_err_val & BIT(j)) {
6998 			dev_err_ratelimited(hdev->dev,
6999 					"%s ARB_ERR. err cause: %s\n",
7000 					qm_name,
7001 					gaudi_qman_arb_error_cause[j]);
7002 		}
7003 	}
7004 }
7005 
7006 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7007 		struct hl_eq_sm_sei_data *sei_data)
7008 {
7009 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7010 
7011 	/* Flip the bits as the enum is ordered in the opposite way */
7012 	index = (index ^ 0x3) & 0x3;
7013 
7014 	switch (sei_data->sei_cause) {
7015 	case SM_SEI_SO_OVERFLOW:
7016 		dev_err_ratelimited(hdev->dev,
7017 			"%s SEI Error: SOB Group %u overflow/underflow",
7018 			gaudi_sync_manager_names[index],
7019 			le32_to_cpu(sei_data->sei_log));
7020 		break;
7021 	case SM_SEI_LBW_4B_UNALIGNED:
7022 		dev_err_ratelimited(hdev->dev,
7023 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7024 			gaudi_sync_manager_names[index],
7025 			le32_to_cpu(sei_data->sei_log));
7026 		break;
7027 	case SM_SEI_AXI_RESPONSE_ERR:
7028 		dev_err_ratelimited(hdev->dev,
7029 			"%s SEI Error: AXI ID %u response error",
7030 			gaudi_sync_manager_names[index],
7031 			le32_to_cpu(sei_data->sei_log));
7032 		break;
7033 	default:
7034 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7035 				le32_to_cpu(sei_data->sei_log));
7036 		break;
7037 	}
7038 }
7039 
7040 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7041 		struct hl_eq_ecc_data *ecc_data)
7042 {
7043 	struct ecc_info_extract_params params;
7044 	u64 ecc_address = 0, ecc_syndrom = 0;
7045 	u8 index, memory_wrapper_idx = 0;
7046 	bool extract_info_from_fw;
7047 	int rc;
7048 
7049 	if (hdev->asic_prop.fw_security_enabled) {
7050 		extract_info_from_fw = true;
7051 		goto extract_ecc_info;
7052 	}
7053 
7054 	switch (event_type) {
7055 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7056 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7057 		extract_info_from_fw = true;
7058 		break;
7059 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7060 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7061 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7062 		params.num_memories = 90;
7063 		params.derr = false;
7064 		extract_info_from_fw = false;
7065 		break;
7066 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7067 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7068 		params.block_address =
7069 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7070 		params.num_memories = 90;
7071 		params.derr = true;
7072 		extract_info_from_fw = false;
7073 		break;
7074 	case GAUDI_EVENT_MME0_ACC_SERR:
7075 	case GAUDI_EVENT_MME1_ACC_SERR:
7076 	case GAUDI_EVENT_MME2_ACC_SERR:
7077 	case GAUDI_EVENT_MME3_ACC_SERR:
7078 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7079 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7080 		params.num_memories = 128;
7081 		params.derr = false;
7082 		extract_info_from_fw = false;
7083 		break;
7084 	case GAUDI_EVENT_MME0_ACC_DERR:
7085 	case GAUDI_EVENT_MME1_ACC_DERR:
7086 	case GAUDI_EVENT_MME2_ACC_DERR:
7087 	case GAUDI_EVENT_MME3_ACC_DERR:
7088 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7089 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7090 		params.num_memories = 128;
7091 		params.derr = true;
7092 		extract_info_from_fw = false;
7093 		break;
7094 	case GAUDI_EVENT_MME0_SBAB_SERR:
7095 	case GAUDI_EVENT_MME1_SBAB_SERR:
7096 	case GAUDI_EVENT_MME2_SBAB_SERR:
7097 	case GAUDI_EVENT_MME3_SBAB_SERR:
7098 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7099 		params.block_address =
7100 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7101 		params.num_memories = 33;
7102 		params.derr = false;
7103 		extract_info_from_fw = false;
7104 		break;
7105 	case GAUDI_EVENT_MME0_SBAB_DERR:
7106 	case GAUDI_EVENT_MME1_SBAB_DERR:
7107 	case GAUDI_EVENT_MME2_SBAB_DERR:
7108 	case GAUDI_EVENT_MME3_SBAB_DERR:
7109 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7110 		params.block_address =
7111 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7112 		params.num_memories = 33;
7113 		params.derr = true;
7114 		extract_info_from_fw = false;
7115 		break;
7116 	default:
7117 		return;
7118 	}
7119 
7120 extract_ecc_info:
7121 	if (extract_info_from_fw) {
7122 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7123 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7124 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7125 	} else {
7126 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7127 				&ecc_syndrom, &memory_wrapper_idx);
7128 		if (rc)
7129 			return;
7130 	}
7131 
7132 	dev_err(hdev->dev,
7133 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7134 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7135 }
7136 
7137 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7138 {
7139 	u64 qman_base;
7140 	char desc[32];
7141 	u32 qid_base;
7142 	u8 index;
7143 
7144 	switch (event_type) {
7145 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7146 		index = event_type - GAUDI_EVENT_TPC0_QM;
7147 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7148 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7149 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7150 		break;
7151 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7152 		if (event_type == GAUDI_EVENT_MME0_QM) {
7153 			index = 0;
7154 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7155 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7156 			index = 2;
7157 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7158 		}
7159 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7160 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7161 		break;
7162 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7163 		index = event_type - GAUDI_EVENT_DMA0_QM;
7164 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7165 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7166 		if (index > 1)
7167 			qid_base++;
7168 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7169 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7170 		break;
7171 	case GAUDI_EVENT_NIC0_QM0:
7172 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7173 		qman_base = mmNIC0_QM0_BASE;
7174 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7175 		break;
7176 	case GAUDI_EVENT_NIC0_QM1:
7177 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7178 		qman_base = mmNIC0_QM1_BASE;
7179 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7180 		break;
7181 	case GAUDI_EVENT_NIC1_QM0:
7182 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7183 		qman_base = mmNIC1_QM0_BASE;
7184 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7185 		break;
7186 	case GAUDI_EVENT_NIC1_QM1:
7187 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7188 		qman_base = mmNIC1_QM1_BASE;
7189 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7190 		break;
7191 	case GAUDI_EVENT_NIC2_QM0:
7192 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7193 		qman_base = mmNIC2_QM0_BASE;
7194 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7195 		break;
7196 	case GAUDI_EVENT_NIC2_QM1:
7197 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7198 		qman_base = mmNIC2_QM1_BASE;
7199 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7200 		break;
7201 	case GAUDI_EVENT_NIC3_QM0:
7202 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7203 		qman_base = mmNIC3_QM0_BASE;
7204 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7205 		break;
7206 	case GAUDI_EVENT_NIC3_QM1:
7207 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7208 		qman_base = mmNIC3_QM1_BASE;
7209 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7210 		break;
7211 	case GAUDI_EVENT_NIC4_QM0:
7212 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7213 		qman_base = mmNIC4_QM0_BASE;
7214 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7215 		break;
7216 	case GAUDI_EVENT_NIC4_QM1:
7217 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7218 		qman_base = mmNIC4_QM1_BASE;
7219 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7220 		break;
7221 	default:
7222 		return;
7223 	}
7224 
7225 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7226 }
7227 
7228 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7229 					bool check_razwi, u64 *event_mask)
7230 {
7231 	bool is_read = false, is_write = false;
7232 	u16 engine_id[2], num_of_razwi_eng = 0;
7233 	char desc[64] = "";
7234 	u64 razwi_addr = 0;
7235 	u8 razwi_flags = 0;
7236 
7237 	/*
7238 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7239 	 * engine id it will get valid value.
7240 	 */
7241 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7242 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7243 
7244 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7245 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7246 		event_type, desc);
7247 
7248 	if (check_razwi) {
7249 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7250 						&is_write);
7251 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7252 
7253 		if (is_read)
7254 			razwi_flags |= HL_RAZWI_READ;
7255 		if (is_write)
7256 			razwi_flags |= HL_RAZWI_WRITE;
7257 
7258 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7259 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7260 				num_of_razwi_eng = 2;
7261 			else
7262 				num_of_razwi_eng = 1;
7263 		}
7264 
7265 		if (razwi_flags)
7266 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7267 					razwi_flags, event_mask);
7268 	}
7269 }
7270 
7271 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7272 					struct cpucp_pkt_sync_err *sync_err)
7273 {
7274 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7275 
7276 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7277 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7278 }
7279 
7280 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7281 					struct hl_eq_fw_alive *fw_alive)
7282 {
7283 	dev_err(hdev->dev,
7284 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7285 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7286 		le32_to_cpu(fw_alive->process_id),
7287 		le32_to_cpu(fw_alive->thread_id),
7288 		le64_to_cpu(fw_alive->uptime_seconds));
7289 }
7290 
7291 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7292 						void *data)
7293 {
7294 	char desc[64] = "", *type;
7295 	struct eq_nic_sei_event *eq_nic_sei = data;
7296 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7297 
7298 	switch (eq_nic_sei->axi_error_cause) {
7299 	case RXB:
7300 		type = "RXB";
7301 		break;
7302 	case RXE:
7303 		type = "RXE";
7304 		break;
7305 	case TXS:
7306 		type = "TXS";
7307 		break;
7308 	case TXE:
7309 		type = "TXE";
7310 		break;
7311 	case QPC_RESP:
7312 		type = "QPC_RESP";
7313 		break;
7314 	case NON_AXI_ERR:
7315 		type = "NON_AXI_ERR";
7316 		break;
7317 	case TMR:
7318 		type = "TMR";
7319 		break;
7320 	default:
7321 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7322 			eq_nic_sei->axi_error_cause);
7323 		type = "N/A";
7324 		break;
7325 	}
7326 
7327 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7328 			eq_nic_sei->id);
7329 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7330 		event_type, desc);
7331 }
7332 
7333 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7334 {
7335 	/* GAUDI doesn't support any reset except hard-reset */
7336 	return -EPERM;
7337 }
7338 
7339 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7340 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7341 {
7342 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7343 	int rc = 0;
7344 
7345 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7346 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7347 		if (!hbm_ecc_data) {
7348 			dev_err(hdev->dev, "No FW ECC data");
7349 			return 0;
7350 		}
7351 
7352 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7353 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7354 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7355 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7356 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7357 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7358 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7359 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7360 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7361 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7362 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7363 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7365 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366 
7367 		dev_err(hdev->dev,
7368 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7369 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7370 		dev_err(hdev->dev,
7371 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7372 			device, ch, hbm_ecc_data->first_addr, type,
7373 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7374 			hbm_ecc_data->dec_cnt);
7375 		return 0;
7376 	}
7377 
7378 	if (hdev->asic_prop.fw_security_enabled) {
7379 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7380 		return 0;
7381 	}
7382 
7383 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7384 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7385 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7386 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7387 		if (val) {
7388 			rc = -EIO;
7389 			dev_err(hdev->dev,
7390 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7391 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7392 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7393 				(val >> 4) & 0x1);
7394 
7395 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7396 			dev_err(hdev->dev,
7397 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7398 				device, ch * 2,
7399 				RREG32(base + ch * 0x1000 + 0x064),
7400 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7401 				(val2 & 0xFF0000) >> 16,
7402 				(val2 & 0xFF000000) >> 24);
7403 		}
7404 
7405 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7406 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7407 		if (val) {
7408 			rc = -EIO;
7409 			dev_err(hdev->dev,
7410 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7411 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7412 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7413 				(val >> 4) & 0x1);
7414 
7415 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7416 			dev_err(hdev->dev,
7417 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7418 				device, ch * 2 + 1,
7419 				RREG32(base + ch * 0x1000 + 0x074),
7420 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7421 				(val2 & 0xFF0000) >> 16,
7422 				(val2 & 0xFF000000) >> 24);
7423 		}
7424 
7425 		/* Clear interrupts */
7426 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7427 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7428 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7429 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7430 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7431 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7432 	}
7433 
7434 	val  = RREG32(base + 0x8F30);
7435 	val2 = RREG32(base + 0x8F34);
7436 	if (val | val2) {
7437 		rc = -EIO;
7438 		dev_err(hdev->dev,
7439 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7440 			device, val, val2);
7441 	}
7442 	val  = RREG32(base + 0x8F40);
7443 	val2 = RREG32(base + 0x8F44);
7444 	if (val | val2) {
7445 		rc = -EIO;
7446 		dev_err(hdev->dev,
7447 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7448 			device, val, val2);
7449 	}
7450 
7451 	return rc;
7452 }
7453 
7454 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7455 {
7456 	switch (hbm_event_type) {
7457 	case GAUDI_EVENT_HBM0_SPI_0:
7458 	case GAUDI_EVENT_HBM0_SPI_1:
7459 		return 0;
7460 	case GAUDI_EVENT_HBM1_SPI_0:
7461 	case GAUDI_EVENT_HBM1_SPI_1:
7462 		return 1;
7463 	case GAUDI_EVENT_HBM2_SPI_0:
7464 	case GAUDI_EVENT_HBM2_SPI_1:
7465 		return 2;
7466 	case GAUDI_EVENT_HBM3_SPI_0:
7467 	case GAUDI_EVENT_HBM3_SPI_1:
7468 		return 3;
7469 	default:
7470 		break;
7471 	}
7472 
7473 	/* Should never happen */
7474 	return 0;
7475 }
7476 
7477 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7478 					char *interrupt_name)
7479 {
7480 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7481 	bool soft_reset_required = false;
7482 
7483 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7484 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7485 
7486 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7487 		if (tpc_interrupts_cause & BIT(i)) {
7488 			dev_err_ratelimited(hdev->dev,
7489 					"TPC%d_%s interrupt cause: %s\n",
7490 					tpc_id, interrupt_name,
7491 					gaudi_tpc_interrupts_cause[i]);
7492 			/* If this is QM error, we need to soft-reset */
7493 			if (i == 15)
7494 				soft_reset_required = true;
7495 		}
7496 
7497 	/* Clear interrupts */
7498 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7499 
7500 	return soft_reset_required;
7501 }
7502 
7503 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7504 {
7505 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7506 }
7507 
7508 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7509 {
7510 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7511 }
7512 
7513 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7514 {
7515 	ktime_t zero_time = ktime_set(0, 0);
7516 
7517 	mutex_lock(&hdev->clk_throttling.lock);
7518 
7519 	switch (event_type) {
7520 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7521 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7522 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7523 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7524 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7525 		dev_info_ratelimited(hdev->dev,
7526 			"Clock throttling due to power consumption\n");
7527 		break;
7528 
7529 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7530 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7531 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7532 		dev_info_ratelimited(hdev->dev,
7533 			"Power envelop is safe, back to optimal clock\n");
7534 		break;
7535 
7536 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7537 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7538 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7539 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7540 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7541 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7542 		dev_info_ratelimited(hdev->dev,
7543 			"Clock throttling due to overheating\n");
7544 		break;
7545 
7546 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7547 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7548 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7549 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7550 		dev_info_ratelimited(hdev->dev,
7551 			"Thermal envelop is safe, back to optimal clock\n");
7552 		break;
7553 
7554 	default:
7555 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7556 			event_type);
7557 		break;
7558 	}
7559 
7560 	mutex_unlock(&hdev->clk_throttling.lock);
7561 }
7562 
7563 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7564 {
7565 	struct gaudi_device *gaudi = hdev->asic_specific;
7566 	struct hl_info_fw_err_info fw_err_info;
7567 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7568 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7569 	u32 fw_fatal_err_flag = 0, flags = 0;
7570 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7571 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7572 	bool reset_required, reset_direct = false;
7573 	u8 cause;
7574 	int rc;
7575 
7576 	if (event_type >= GAUDI_EVENT_SIZE) {
7577 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7578 				event_type, GAUDI_EVENT_SIZE - 1);
7579 		return;
7580 	}
7581 
7582 	gaudi->events_stat[event_type]++;
7583 	gaudi->events_stat_aggregate[event_type]++;
7584 
7585 	switch (event_type) {
7586 	case GAUDI_EVENT_PCIE_CORE_DERR:
7587 	case GAUDI_EVENT_PCIE_IF_DERR:
7588 	case GAUDI_EVENT_PCIE_PHY_DERR:
7589 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7590 	case GAUDI_EVENT_MME0_ACC_DERR:
7591 	case GAUDI_EVENT_MME0_SBAB_DERR:
7592 	case GAUDI_EVENT_MME1_ACC_DERR:
7593 	case GAUDI_EVENT_MME1_SBAB_DERR:
7594 	case GAUDI_EVENT_MME2_ACC_DERR:
7595 	case GAUDI_EVENT_MME2_SBAB_DERR:
7596 	case GAUDI_EVENT_MME3_ACC_DERR:
7597 	case GAUDI_EVENT_MME3_SBAB_DERR:
7598 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7599 		fallthrough;
7600 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7601 	case GAUDI_EVENT_PSOC_MEM_DERR:
7602 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7603 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7604 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7605 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7606 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7607 	case GAUDI_EVENT_MMU_DERR:
7608 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7609 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7610 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7611 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7612 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7613 		goto reset_device;
7614 
7615 	case GAUDI_EVENT_GIC500:
7616 	case GAUDI_EVENT_AXI_ECC:
7617 	case GAUDI_EVENT_L2_RAM_ECC:
7618 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7619 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7620 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7621 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7622 		goto reset_device;
7623 
7624 	case GAUDI_EVENT_HBM0_SPI_0:
7625 	case GAUDI_EVENT_HBM1_SPI_0:
7626 	case GAUDI_EVENT_HBM2_SPI_0:
7627 	case GAUDI_EVENT_HBM3_SPI_0:
7628 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7629 		gaudi_hbm_read_interrupts(hdev,
7630 				gaudi_hbm_event_to_dev(event_type),
7631 				&eq_entry->hbm_ecc_data);
7632 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7633 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7634 		goto reset_device;
7635 
7636 	case GAUDI_EVENT_HBM0_SPI_1:
7637 	case GAUDI_EVENT_HBM1_SPI_1:
7638 	case GAUDI_EVENT_HBM2_SPI_1:
7639 	case GAUDI_EVENT_HBM3_SPI_1:
7640 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7641 		gaudi_hbm_read_interrupts(hdev,
7642 				gaudi_hbm_event_to_dev(event_type),
7643 				&eq_entry->hbm_ecc_data);
7644 		hl_fw_unmask_irq(hdev, event_type);
7645 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7646 		break;
7647 
7648 	case GAUDI_EVENT_TPC0_DEC:
7649 	case GAUDI_EVENT_TPC1_DEC:
7650 	case GAUDI_EVENT_TPC2_DEC:
7651 	case GAUDI_EVENT_TPC3_DEC:
7652 	case GAUDI_EVENT_TPC4_DEC:
7653 	case GAUDI_EVENT_TPC5_DEC:
7654 	case GAUDI_EVENT_TPC6_DEC:
7655 	case GAUDI_EVENT_TPC7_DEC:
7656 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7657 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7658 		 * The SW upper layer will inspect an internal mapped area to indicate
7659 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7660 		 */
7661 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7662 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7663 		reset_required = gaudi_tpc_read_interrupts(hdev,
7664 					tpc_dec_event_to_tpc_id(event_type),
7665 					"AXI_SLV_DEC_Error");
7666 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7667 		if (reset_required) {
7668 			dev_err(hdev->dev, "reset required due to %s\n",
7669 				gaudi_irq_map_table[event_type].name);
7670 
7671 			reset_direct = true;
7672 			goto reset_device;
7673 		} else {
7674 			hl_fw_unmask_irq(hdev, event_type);
7675 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7676 		}
7677 		break;
7678 
7679 	case GAUDI_EVENT_TPC0_KRN_ERR:
7680 	case GAUDI_EVENT_TPC1_KRN_ERR:
7681 	case GAUDI_EVENT_TPC2_KRN_ERR:
7682 	case GAUDI_EVENT_TPC3_KRN_ERR:
7683 	case GAUDI_EVENT_TPC4_KRN_ERR:
7684 	case GAUDI_EVENT_TPC5_KRN_ERR:
7685 	case GAUDI_EVENT_TPC6_KRN_ERR:
7686 	case GAUDI_EVENT_TPC7_KRN_ERR:
7687 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7688 		reset_required = gaudi_tpc_read_interrupts(hdev,
7689 					tpc_krn_event_to_tpc_id(event_type),
7690 					"KRN_ERR");
7691 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7692 		if (reset_required) {
7693 			dev_err(hdev->dev, "reset required due to %s\n",
7694 				gaudi_irq_map_table[event_type].name);
7695 
7696 			reset_direct = true;
7697 			goto reset_device;
7698 		} else {
7699 			hl_fw_unmask_irq(hdev, event_type);
7700 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7701 		}
7702 		break;
7703 
7704 	case GAUDI_EVENT_PCIE_CORE_SERR:
7705 	case GAUDI_EVENT_PCIE_IF_SERR:
7706 	case GAUDI_EVENT_PCIE_PHY_SERR:
7707 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7708 	case GAUDI_EVENT_MME0_ACC_SERR:
7709 	case GAUDI_EVENT_MME0_SBAB_SERR:
7710 	case GAUDI_EVENT_MME1_ACC_SERR:
7711 	case GAUDI_EVENT_MME1_SBAB_SERR:
7712 	case GAUDI_EVENT_MME2_ACC_SERR:
7713 	case GAUDI_EVENT_MME2_SBAB_SERR:
7714 	case GAUDI_EVENT_MME3_ACC_SERR:
7715 	case GAUDI_EVENT_MME3_SBAB_SERR:
7716 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7717 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7718 	case GAUDI_EVENT_PSOC_MEM_SERR:
7719 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7720 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7721 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7722 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7723 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7724 		fallthrough;
7725 	case GAUDI_EVENT_MMU_SERR:
7726 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7727 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7728 		hl_fw_unmask_irq(hdev, event_type);
7729 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7730 		break;
7731 
7732 	case GAUDI_EVENT_PCIE_DEC:
7733 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7734 	case GAUDI_EVENT_PSOC_AXI_DEC:
7735 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7736 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7737 		hl_fw_unmask_irq(hdev, event_type);
7738 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7739 		break;
7740 
7741 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7742 	case GAUDI_EVENT_MMU_WR_PERM:
7743 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7744 		hl_fw_unmask_irq(hdev, event_type);
7745 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7746 		break;
7747 
7748 	case GAUDI_EVENT_MME0_WBC_RSP:
7749 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7750 	case GAUDI_EVENT_MME1_WBC_RSP:
7751 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7752 	case GAUDI_EVENT_MME2_WBC_RSP:
7753 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7754 	case GAUDI_EVENT_MME3_WBC_RSP:
7755 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7756 	case GAUDI_EVENT_RAZWI_OR_ADC:
7757 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7758 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7759 		fallthrough;
7760 	case GAUDI_EVENT_NIC0_QM0:
7761 	case GAUDI_EVENT_NIC0_QM1:
7762 	case GAUDI_EVENT_NIC1_QM0:
7763 	case GAUDI_EVENT_NIC1_QM1:
7764 	case GAUDI_EVENT_NIC2_QM0:
7765 	case GAUDI_EVENT_NIC2_QM1:
7766 	case GAUDI_EVENT_NIC3_QM0:
7767 	case GAUDI_EVENT_NIC3_QM1:
7768 	case GAUDI_EVENT_NIC4_QM0:
7769 	case GAUDI_EVENT_NIC4_QM1:
7770 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7771 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7772 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7773 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7774 		hl_fw_unmask_irq(hdev, event_type);
7775 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7776 		break;
7777 
7778 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7779 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7780 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7781 		goto reset_device;
7782 
7783 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7784 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7785 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7786 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7787 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7788 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7789 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7790 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7791 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7792 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7793 		hl_fw_unmask_irq(hdev, event_type);
7794 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7795 		break;
7796 
7797 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7798 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7799 		hl_fw_unmask_irq(hdev, event_type);
7800 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7801 		break;
7802 
7803 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7804 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7805 		gaudi_print_sm_sei_info(hdev, event_type,
7806 					&eq_entry->sm_sei_data);
7807 		rc = hl_state_dump(hdev);
7808 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7809 		if (rc)
7810 			dev_err(hdev->dev,
7811 				"Error during system state dump %d\n", rc);
7812 		hl_fw_unmask_irq(hdev, event_type);
7813 		break;
7814 
7815 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7816 		break;
7817 
7818 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7819 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7820 		hl_fw_unmask_irq(hdev, event_type);
7821 		break;
7822 
7823 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7824 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7825 		dev_err(hdev->dev,
7826 			"Received high temp H/W interrupt %d (cause %d)\n",
7827 			event_type, cause);
7828 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7829 		break;
7830 
7831 	case GAUDI_EVENT_DEV_RESET_REQ:
7832 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7833 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7834 		goto reset_device;
7835 
7836 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7837 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7838 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7839 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7840 		goto reset_device;
7841 
7842 	case GAUDI_EVENT_FW_ALIVE_S:
7843 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7844 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7845 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7846 		fw_err_info.event_id = event_type;
7847 		fw_err_info.event_mask = &event_mask;
7848 		hl_handle_fw_err(hdev, &fw_err_info);
7849 		goto reset_device;
7850 
7851 	default:
7852 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7853 				event_type);
7854 		break;
7855 	}
7856 
7857 	if (event_mask)
7858 		hl_notifier_event_send_all(hdev, event_mask);
7859 
7860 	return;
7861 
7862 reset_device:
7863 	reset_required = true;
7864 
7865 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7866 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7867 
7868 		/* notify on device unavailable while the reset triggered by fw */
7869 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7870 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7871 	} else if (hdev->hard_reset_on_fw_events) {
7872 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7873 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7874 	} else {
7875 		reset_required = false;
7876 	}
7877 
7878 	if (reset_required) {
7879 		/* escalate general hw errors to critical/fatal error */
7880 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7881 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7882 
7883 		hl_device_cond_reset(hdev, flags, event_mask);
7884 	} else {
7885 		hl_fw_unmask_irq(hdev, event_type);
7886 		/* Notification on occurred event needs to be sent although reset is not executed */
7887 		if (event_mask)
7888 			hl_notifier_event_send_all(hdev, event_mask);
7889 	}
7890 }
7891 
7892 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7893 {
7894 	struct gaudi_device *gaudi = hdev->asic_specific;
7895 
7896 	if (aggregate) {
7897 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7898 		return gaudi->events_stat_aggregate;
7899 	}
7900 
7901 	*size = (u32) sizeof(gaudi->events_stat);
7902 	return gaudi->events_stat;
7903 }
7904 
7905 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7906 {
7907 	struct gaudi_device *gaudi = hdev->asic_specific;
7908 	u32 status, timeout_usec;
7909 	int rc;
7910 
7911 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7912 		hdev->reset_info.hard_reset_pending)
7913 		return 0;
7914 
7915 	if (hdev->pldm)
7916 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7917 	else
7918 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7919 
7920 	/* L0 & L1 invalidation */
7921 	WREG32(mmSTLB_INV_PS, 3);
7922 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7923 	WREG32(mmSTLB_INV_PS, 2);
7924 
7925 	rc = hl_poll_timeout(
7926 		hdev,
7927 		mmSTLB_INV_PS,
7928 		status,
7929 		!status,
7930 		1000,
7931 		timeout_usec);
7932 
7933 	WREG32(mmSTLB_INV_SET, 0);
7934 
7935 	return rc;
7936 }
7937 
7938 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7939 						bool is_hard, u32 flags,
7940 						u32 asid, u64 va, u64 size)
7941 {
7942 	/* Treat as invalidate all because there is no range invalidation
7943 	 * in Gaudi
7944 	 */
7945 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7946 }
7947 
7948 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7949 {
7950 	u32 status, timeout_usec;
7951 	int rc;
7952 
7953 	if (hdev->pldm)
7954 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7955 	else
7956 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7957 
7958 	WREG32(MMU_ASID, asid);
7959 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7960 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7961 	WREG32(MMU_BUSY, 0x80000000);
7962 
7963 	rc = hl_poll_timeout(
7964 		hdev,
7965 		MMU_BUSY,
7966 		status,
7967 		!(status & 0x80000000),
7968 		1000,
7969 		timeout_usec);
7970 
7971 	if (rc) {
7972 		dev_err(hdev->dev,
7973 			"Timeout during MMU hop0 config of asid %d\n", asid);
7974 		return rc;
7975 	}
7976 
7977 	return 0;
7978 }
7979 
7980 static int gaudi_send_heartbeat(struct hl_device *hdev)
7981 {
7982 	struct gaudi_device *gaudi = hdev->asic_specific;
7983 
7984 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7985 		return 0;
7986 
7987 	return hl_fw_send_heartbeat(hdev);
7988 }
7989 
7990 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7991 {
7992 	struct gaudi_device *gaudi = hdev->asic_specific;
7993 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7994 	int rc;
7995 
7996 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7997 		return 0;
7998 
7999 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8000 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8001 					mmCPU_BOOT_ERR1);
8002 	if (rc)
8003 		return rc;
8004 
8005 	if (!strlen(prop->cpucp_info.card_name))
8006 		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8007 				CARD_NAME_MAX_LEN);
8008 
8009 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8010 
8011 	set_default_power_values(hdev);
8012 
8013 	return 0;
8014 }
8015 
8016 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8017 		struct engines_data *e)
8018 {
8019 	struct gaudi_device *gaudi = hdev->asic_specific;
8020 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8021 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8022 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8023 	unsigned long *mask = (unsigned long *)mask_arr;
8024 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8025 	bool is_idle = true, is_eng_idle, is_slave;
8026 	u64 offset;
8027 	int i, dma_id, port;
8028 
8029 	if (e)
8030 		hl_engine_data_sprintf(e,
8031 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8032 			"---  -------  ------------  ----------  -------------\n");
8033 
8034 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8035 		dma_id = gaudi_dma_assignment[i];
8036 		offset = dma_id * DMA_QMAN_OFFSET;
8037 
8038 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8039 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8040 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8041 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8042 				IS_DMA_IDLE(dma_core_sts0);
8043 		is_idle &= is_eng_idle;
8044 
8045 		if (mask && !is_eng_idle)
8046 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8047 		if (e)
8048 			hl_engine_data_sprintf(e, fmt, dma_id,
8049 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8050 				qm_cgm_sts, dma_core_sts0);
8051 	}
8052 
8053 	if (e)
8054 		hl_engine_data_sprintf(e,
8055 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8056 			"---  -------  ------------  ----------  ----------\n");
8057 
8058 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8059 		offset = i * TPC_QMAN_OFFSET;
8060 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8061 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8062 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8063 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8064 				IS_TPC_IDLE(tpc_cfg_sts);
8065 		is_idle &= is_eng_idle;
8066 
8067 		if (mask && !is_eng_idle)
8068 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8069 		if (e)
8070 			hl_engine_data_sprintf(e, fmt, i,
8071 				is_eng_idle ? "Y" : "N",
8072 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8073 	}
8074 
8075 	if (e)
8076 		hl_engine_data_sprintf(e,
8077 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8078 			"---  -------  ------------  ----------  -----------\n");
8079 
8080 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8081 		offset = i * MME_QMAN_OFFSET;
8082 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8083 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8084 
8085 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8086 		is_slave = i % 2;
8087 		if (!is_slave) {
8088 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8089 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8090 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8091 		}
8092 
8093 		is_idle &= is_eng_idle;
8094 
8095 		if (mask && !is_eng_idle)
8096 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8097 		if (e) {
8098 			if (!is_slave)
8099 				hl_engine_data_sprintf(e, fmt, i,
8100 					is_eng_idle ? "Y" : "N",
8101 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8102 			else
8103 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8104 					is_eng_idle ? "Y" : "N", "-",
8105 					"-", mme_arch_sts);
8106 		}
8107 	}
8108 
8109 	if (e)
8110 		hl_engine_data_sprintf(e,
8111 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8112 				"---  -------  ------------  ----------\n");
8113 
8114 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8115 		offset = i * NIC_MACRO_QMAN_OFFSET;
8116 		port = 2 * i;
8117 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8118 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8119 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8120 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8121 			is_idle &= is_eng_idle;
8122 
8123 			if (mask && !is_eng_idle)
8124 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8125 			if (e)
8126 				hl_engine_data_sprintf(e, nic_fmt, port,
8127 						is_eng_idle ? "Y" : "N",
8128 						qm_glbl_sts0, qm_cgm_sts);
8129 		}
8130 
8131 		port = 2 * i + 1;
8132 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8133 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8134 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8135 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8136 			is_idle &= is_eng_idle;
8137 
8138 			if (mask && !is_eng_idle)
8139 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8140 			if (e)
8141 				hl_engine_data_sprintf(e, nic_fmt, port,
8142 						is_eng_idle ? "Y" : "N",
8143 						qm_glbl_sts0, qm_cgm_sts);
8144 		}
8145 	}
8146 
8147 	if (e)
8148 		hl_engine_data_sprintf(e, "\n");
8149 
8150 	return is_idle;
8151 }
8152 
8153 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8154 	__acquires(&gaudi->hw_queues_lock)
8155 {
8156 	struct gaudi_device *gaudi = hdev->asic_specific;
8157 
8158 	spin_lock(&gaudi->hw_queues_lock);
8159 }
8160 
8161 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8162 	__releases(&gaudi->hw_queues_lock)
8163 {
8164 	struct gaudi_device *gaudi = hdev->asic_specific;
8165 
8166 	spin_unlock(&gaudi->hw_queues_lock);
8167 }
8168 
8169 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8170 {
8171 	return hdev->pdev->device;
8172 }
8173 
8174 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8175 				size_t max_size)
8176 {
8177 	struct gaudi_device *gaudi = hdev->asic_specific;
8178 
8179 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8180 		return 0;
8181 
8182 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8183 }
8184 
8185 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8186 {
8187 	struct gaudi_device *gaudi = hdev->asic_specific;
8188 
8189 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8190 		return 0;
8191 
8192 	return hl_fw_get_monitor_dump(hdev, data);
8193 }
8194 
8195 /*
8196  * this function should be used only during initialization and/or after reset,
8197  * when there are no active users.
8198  */
8199 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8200 {
8201 	u64 kernel_timeout;
8202 	u32 status, offset;
8203 	int rc;
8204 
8205 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8206 
8207 	if (hdev->pldm)
8208 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8209 	else
8210 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8211 
8212 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8213 			lower_32_bits(tpc_kernel));
8214 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8215 			upper_32_bits(tpc_kernel));
8216 
8217 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8218 			lower_32_bits(tpc_kernel));
8219 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8220 			upper_32_bits(tpc_kernel));
8221 	/* set a valid LUT pointer, content is of no significance */
8222 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8223 			lower_32_bits(tpc_kernel));
8224 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8225 			upper_32_bits(tpc_kernel));
8226 
8227 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8228 			lower_32_bits(CFG_BASE +
8229 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8230 
8231 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8232 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8233 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8234 	/* wait a bit for the engine to start executing */
8235 	usleep_range(1000, 1500);
8236 
8237 	/* wait until engine has finished executing */
8238 	rc = hl_poll_timeout(
8239 		hdev,
8240 		mmTPC0_CFG_STATUS + offset,
8241 		status,
8242 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8243 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8244 		1000,
8245 		kernel_timeout);
8246 
8247 	if (rc) {
8248 		dev_err(hdev->dev,
8249 			"Timeout while waiting for TPC%d icache prefetch\n",
8250 			tpc_id);
8251 		return -EIO;
8252 	}
8253 
8254 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8255 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8256 
8257 	/* wait a bit for the engine to start executing */
8258 	usleep_range(1000, 1500);
8259 
8260 	/* wait until engine has finished executing */
8261 	rc = hl_poll_timeout(
8262 		hdev,
8263 		mmTPC0_CFG_STATUS + offset,
8264 		status,
8265 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8266 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8267 		1000,
8268 		kernel_timeout);
8269 
8270 	if (rc) {
8271 		dev_err(hdev->dev,
8272 			"Timeout while waiting for TPC%d vector pipe\n",
8273 			tpc_id);
8274 		return -EIO;
8275 	}
8276 
8277 	rc = hl_poll_timeout(
8278 		hdev,
8279 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8280 		status,
8281 		(status == 0),
8282 		1000,
8283 		kernel_timeout);
8284 
8285 	if (rc) {
8286 		dev_err(hdev->dev,
8287 			"Timeout while waiting for TPC%d kernel to execute\n",
8288 			tpc_id);
8289 		return -EIO;
8290 	}
8291 
8292 	return 0;
8293 }
8294 
8295 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8296 		struct hl_ctx *ctx)
8297 {
8298 	struct gaudi_device *gaudi = hdev->asic_specific;
8299 	int min_alloc_order, rc, collective_cb_size;
8300 
8301 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8302 		return 0;
8303 
8304 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8305 							HOST_SPACE_INTERNAL_CB_SZ,
8306 							&hdev->internal_cb_pool_dma_addr,
8307 							GFP_KERNEL | __GFP_ZERO);
8308 
8309 	if (!hdev->internal_cb_pool_virt_addr)
8310 		return -ENOMEM;
8311 
8312 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8313 			sizeof(struct packet_fence);
8314 	min_alloc_order = ilog2(collective_cb_size);
8315 
8316 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8317 	if (!hdev->internal_cb_pool) {
8318 		dev_err(hdev->dev,
8319 			"Failed to create internal CB pool\n");
8320 		rc = -ENOMEM;
8321 		goto free_internal_cb_pool;
8322 	}
8323 
8324 	rc = gen_pool_add(hdev->internal_cb_pool,
8325 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8326 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8327 	if (rc) {
8328 		dev_err(hdev->dev,
8329 			"Failed to add memory to internal CB pool\n");
8330 		rc = -EFAULT;
8331 		goto destroy_internal_cb_pool;
8332 	}
8333 
8334 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8335 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8336 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8337 
8338 	if (!hdev->internal_cb_va_base) {
8339 		rc = -ENOMEM;
8340 		goto destroy_internal_cb_pool;
8341 	}
8342 
8343 	mutex_lock(&hdev->mmu_lock);
8344 
8345 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8346 			hdev->internal_cb_pool_dma_addr,
8347 			HOST_SPACE_INTERNAL_CB_SZ);
8348 	if (rc)
8349 		goto unreserve_internal_cb_pool;
8350 
8351 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8352 	if (rc)
8353 		goto unmap_internal_cb_pool;
8354 
8355 	mutex_unlock(&hdev->mmu_lock);
8356 
8357 	return 0;
8358 
8359 unmap_internal_cb_pool:
8360 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8361 			HOST_SPACE_INTERNAL_CB_SZ);
8362 unreserve_internal_cb_pool:
8363 	mutex_unlock(&hdev->mmu_lock);
8364 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8365 			HOST_SPACE_INTERNAL_CB_SZ);
8366 destroy_internal_cb_pool:
8367 	gen_pool_destroy(hdev->internal_cb_pool);
8368 free_internal_cb_pool:
8369 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8370 					hdev->internal_cb_pool_dma_addr);
8371 
8372 	return rc;
8373 }
8374 
8375 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8376 		struct hl_ctx *ctx)
8377 {
8378 	struct gaudi_device *gaudi = hdev->asic_specific;
8379 
8380 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8381 		return;
8382 
8383 	mutex_lock(&hdev->mmu_lock);
8384 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8385 			HOST_SPACE_INTERNAL_CB_SZ);
8386 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8387 			HOST_SPACE_INTERNAL_CB_SZ);
8388 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8389 	mutex_unlock(&hdev->mmu_lock);
8390 
8391 	gen_pool_destroy(hdev->internal_cb_pool);
8392 
8393 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8394 					hdev->internal_cb_pool_dma_addr);
8395 }
8396 
8397 static int gaudi_ctx_init(struct hl_ctx *ctx)
8398 {
8399 	int rc;
8400 
8401 	if (ctx->asid == HL_KERNEL_ASID_ID)
8402 		return 0;
8403 
8404 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8405 	if (rc)
8406 		return rc;
8407 
8408 	rc = gaudi_restore_user_registers(ctx->hdev);
8409 	if (rc)
8410 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8411 
8412 	return rc;
8413 }
8414 
8415 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8416 {
8417 	if (ctx->asid == HL_KERNEL_ASID_ID)
8418 		return;
8419 
8420 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8421 }
8422 
8423 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8424 {
8425 	return 0;
8426 }
8427 
8428 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8429 {
8430 	return gaudi_cq_assignment[cq_idx];
8431 }
8432 
8433 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8434 {
8435 	return sizeof(struct packet_msg_short) +
8436 			sizeof(struct packet_msg_prot) * 2;
8437 }
8438 
8439 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8440 {
8441 	return sizeof(struct packet_msg_short) * 4 +
8442 			sizeof(struct packet_fence) +
8443 			sizeof(struct packet_msg_prot) * 2;
8444 }
8445 
8446 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8447 {
8448 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8449 }
8450 
8451 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8452 				u32 size, bool eb)
8453 {
8454 	struct hl_cb *cb = (struct hl_cb *) data;
8455 	struct packet_msg_short *pkt;
8456 	u32 value, ctl, pkt_size = sizeof(*pkt);
8457 
8458 	pkt = cb->kernel_address + size;
8459 	memset(pkt, 0, pkt_size);
8460 
8461 	/* Inc by 1, Mode ADD */
8462 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8463 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8464 
8465 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8466 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8467 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8468 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8469 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8470 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8471 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8472 
8473 	pkt->value = cpu_to_le32(value);
8474 	pkt->ctl = cpu_to_le32(ctl);
8475 
8476 	return size + pkt_size;
8477 }
8478 
8479 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8480 					u16 addr)
8481 {
8482 	u32 ctl, pkt_size = sizeof(*pkt);
8483 
8484 	memset(pkt, 0, pkt_size);
8485 
8486 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8487 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8488 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8489 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8490 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8491 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8492 
8493 	pkt->value = cpu_to_le32(value);
8494 	pkt->ctl = cpu_to_le32(ctl);
8495 
8496 	return pkt_size;
8497 }
8498 
8499 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8500 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8501 		u16 sob_val, u16 mon_id)
8502 {
8503 	u64 monitor_base;
8504 	u32 ctl, value, pkt_size = sizeof(*pkt);
8505 	u16 msg_addr_offset;
8506 	u8 mask;
8507 
8508 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8509 		dev_err(hdev->dev,
8510 			"sob_base %u (mask %#x) is not valid\n",
8511 			sob_base, sob_mask);
8512 		return 0;
8513 	}
8514 
8515 	/*
8516 	 * monitor_base should be the content of the base0 address registers,
8517 	 * so it will be added to the msg short offsets
8518 	 */
8519 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8520 
8521 	msg_addr_offset =
8522 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8523 				monitor_base;
8524 
8525 	memset(pkt, 0, pkt_size);
8526 
8527 	/* Monitor config packet: bind the monitor to a sync object */
8528 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8529 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8530 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8531 			0); /* GREATER OR EQUAL*/
8532 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8533 
8534 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8535 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8536 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8537 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8538 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8539 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8540 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8541 
8542 	pkt->value = cpu_to_le32(value);
8543 	pkt->ctl = cpu_to_le32(ctl);
8544 
8545 	return pkt_size;
8546 }
8547 
8548 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8549 {
8550 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8551 
8552 	memset(pkt, 0, pkt_size);
8553 
8554 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8555 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8556 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8557 
8558 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8559 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8560 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8561 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8562 
8563 	pkt->cfg = cpu_to_le32(cfg);
8564 	pkt->ctl = cpu_to_le32(ctl);
8565 
8566 	return pkt_size;
8567 }
8568 
8569 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8570 {
8571 	u32 offset, nic_index;
8572 
8573 	switch (queue_id) {
8574 	case GAUDI_QUEUE_ID_DMA_0_0:
8575 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8576 		break;
8577 	case GAUDI_QUEUE_ID_DMA_0_1:
8578 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8579 		break;
8580 	case GAUDI_QUEUE_ID_DMA_0_2:
8581 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8582 		break;
8583 	case GAUDI_QUEUE_ID_DMA_0_3:
8584 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8585 		break;
8586 	case GAUDI_QUEUE_ID_DMA_1_0:
8587 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8588 		break;
8589 	case GAUDI_QUEUE_ID_DMA_1_1:
8590 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8591 		break;
8592 	case GAUDI_QUEUE_ID_DMA_1_2:
8593 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8594 		break;
8595 	case GAUDI_QUEUE_ID_DMA_1_3:
8596 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8597 		break;
8598 	case GAUDI_QUEUE_ID_DMA_5_0:
8599 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8600 		break;
8601 	case GAUDI_QUEUE_ID_DMA_5_1:
8602 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8603 		break;
8604 	case GAUDI_QUEUE_ID_DMA_5_2:
8605 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8606 		break;
8607 	case GAUDI_QUEUE_ID_DMA_5_3:
8608 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8609 		break;
8610 	case GAUDI_QUEUE_ID_TPC_7_0:
8611 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8612 		break;
8613 	case GAUDI_QUEUE_ID_TPC_7_1:
8614 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8615 		break;
8616 	case GAUDI_QUEUE_ID_TPC_7_2:
8617 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8618 		break;
8619 	case GAUDI_QUEUE_ID_TPC_7_3:
8620 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8621 		break;
8622 	case GAUDI_QUEUE_ID_NIC_0_0:
8623 	case GAUDI_QUEUE_ID_NIC_1_0:
8624 	case GAUDI_QUEUE_ID_NIC_2_0:
8625 	case GAUDI_QUEUE_ID_NIC_3_0:
8626 	case GAUDI_QUEUE_ID_NIC_4_0:
8627 	case GAUDI_QUEUE_ID_NIC_5_0:
8628 	case GAUDI_QUEUE_ID_NIC_6_0:
8629 	case GAUDI_QUEUE_ID_NIC_7_0:
8630 	case GAUDI_QUEUE_ID_NIC_8_0:
8631 	case GAUDI_QUEUE_ID_NIC_9_0:
8632 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8633 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8634 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8635 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8636 		break;
8637 	case GAUDI_QUEUE_ID_NIC_0_1:
8638 	case GAUDI_QUEUE_ID_NIC_1_1:
8639 	case GAUDI_QUEUE_ID_NIC_2_1:
8640 	case GAUDI_QUEUE_ID_NIC_3_1:
8641 	case GAUDI_QUEUE_ID_NIC_4_1:
8642 	case GAUDI_QUEUE_ID_NIC_5_1:
8643 	case GAUDI_QUEUE_ID_NIC_6_1:
8644 	case GAUDI_QUEUE_ID_NIC_7_1:
8645 	case GAUDI_QUEUE_ID_NIC_8_1:
8646 	case GAUDI_QUEUE_ID_NIC_9_1:
8647 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8648 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8649 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8650 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8651 		break;
8652 	case GAUDI_QUEUE_ID_NIC_0_2:
8653 	case GAUDI_QUEUE_ID_NIC_1_2:
8654 	case GAUDI_QUEUE_ID_NIC_2_2:
8655 	case GAUDI_QUEUE_ID_NIC_3_2:
8656 	case GAUDI_QUEUE_ID_NIC_4_2:
8657 	case GAUDI_QUEUE_ID_NIC_5_2:
8658 	case GAUDI_QUEUE_ID_NIC_6_2:
8659 	case GAUDI_QUEUE_ID_NIC_7_2:
8660 	case GAUDI_QUEUE_ID_NIC_8_2:
8661 	case GAUDI_QUEUE_ID_NIC_9_2:
8662 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8663 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8664 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8665 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8666 		break;
8667 	case GAUDI_QUEUE_ID_NIC_0_3:
8668 	case GAUDI_QUEUE_ID_NIC_1_3:
8669 	case GAUDI_QUEUE_ID_NIC_2_3:
8670 	case GAUDI_QUEUE_ID_NIC_3_3:
8671 	case GAUDI_QUEUE_ID_NIC_4_3:
8672 	case GAUDI_QUEUE_ID_NIC_5_3:
8673 	case GAUDI_QUEUE_ID_NIC_6_3:
8674 	case GAUDI_QUEUE_ID_NIC_7_3:
8675 	case GAUDI_QUEUE_ID_NIC_8_3:
8676 	case GAUDI_QUEUE_ID_NIC_9_3:
8677 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8678 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8679 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8680 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8681 		break;
8682 	default:
8683 		return -EINVAL;
8684 	}
8685 
8686 	*addr = CFG_BASE + offset;
8687 
8688 	return 0;
8689 }
8690 
8691 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8692 {
8693 	u64 monitor_base;
8694 	u32 size = 0;
8695 	u16 msg_addr_offset;
8696 
8697 	/*
8698 	 * monitor_base should be the content of the base0 address registers,
8699 	 * so it will be added to the msg short offsets
8700 	 */
8701 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8702 
8703 	/* First monitor config packet: low address of the sync */
8704 	msg_addr_offset =
8705 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8706 				monitor_base;
8707 
8708 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8709 					msg_addr_offset);
8710 
8711 	/* Second monitor config packet: high address of the sync */
8712 	msg_addr_offset =
8713 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8714 				monitor_base;
8715 
8716 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8717 					msg_addr_offset);
8718 
8719 	/*
8720 	 * Third monitor config packet: the payload, i.e. what to write when the
8721 	 * sync triggers
8722 	 */
8723 	msg_addr_offset =
8724 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8725 				monitor_base;
8726 
8727 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8728 
8729 	return size;
8730 }
8731 
8732 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8733 				struct hl_gen_wait_properties *prop)
8734 {
8735 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8736 	void *buf = cb->kernel_address;
8737 	u64 fence_addr = 0;
8738 	u32 size = prop->size;
8739 
8740 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8741 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8742 				prop->q_idx);
8743 		return 0;
8744 	}
8745 
8746 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8747 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8748 			prop->sob_mask, prop->sob_val, prop->mon_id);
8749 	size += gaudi_add_fence_pkt(buf + size);
8750 
8751 	return size;
8752 }
8753 
8754 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8755 {
8756 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8757 
8758 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8759 		hw_sob->sob_id);
8760 
8761 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8762 			hw_sob->sob_id * 4, 0);
8763 
8764 	kref_init(&hw_sob->kref);
8765 }
8766 
8767 static u64 gaudi_get_device_time(struct hl_device *hdev)
8768 {
8769 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8770 
8771 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8772 }
8773 
8774 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8775 				u32 *block_size, u32 *block_id)
8776 {
8777 	return -EPERM;
8778 }
8779 
8780 static int gaudi_block_mmap(struct hl_device *hdev,
8781 				struct vm_area_struct *vma,
8782 				u32 block_id, u32 block_size)
8783 {
8784 	return -EPERM;
8785 }
8786 
8787 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8788 {
8789 	struct cpu_dyn_regs *dyn_regs =
8790 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8791 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8792 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8793 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8794 
8795 	WREG32(irq_handler_offset,
8796 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8797 }
8798 
8799 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8800 {
8801 	return -EINVAL;
8802 }
8803 
8804 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8805 {
8806 	switch (pll_idx) {
8807 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8808 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8809 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8810 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8811 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8812 	case HL_GAUDI_MME_PLL: return MME_PLL;
8813 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8814 	case HL_GAUDI_IF_PLL: return IF_PLL;
8815 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8816 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8817 	default: return -EINVAL;
8818 	}
8819 }
8820 
8821 static int gaudi_add_sync_to_engine_map_entry(
8822 	struct hl_sync_to_engine_map *map, u32 reg_value,
8823 	enum hl_sync_engine_type engine_type, u32 engine_id)
8824 {
8825 	struct hl_sync_to_engine_map_entry *entry;
8826 
8827 	/* Reg value represents a partial address of sync object,
8828 	 * it is used as unique identifier. For this we need to
8829 	 * clear the cutoff cfg base bits from the value.
8830 	 */
8831 	if (reg_value == 0 || reg_value == 0xffffffff)
8832 		return 0;
8833 	reg_value -= lower_32_bits(CFG_BASE);
8834 
8835 	/* create a new hash entry */
8836 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8837 	if (!entry)
8838 		return -ENOMEM;
8839 	entry->engine_type = engine_type;
8840 	entry->engine_id = engine_id;
8841 	entry->sync_id = reg_value;
8842 	hash_add(map->tb, &entry->node, reg_value);
8843 
8844 	return 0;
8845 }
8846 
8847 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8848 				struct hl_sync_to_engine_map *map)
8849 {
8850 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8851 	int i, j, rc;
8852 	u32 reg_value;
8853 
8854 	/* Iterate over TPC engines */
8855 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8856 
8857 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8858 					sds->props[SP_NEXT_TPC] * i);
8859 
8860 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8861 							ENGINE_TPC, i);
8862 		if (rc)
8863 			goto free_sync_to_engine_map;
8864 	}
8865 
8866 	/* Iterate over MME engines */
8867 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8868 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8869 
8870 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8871 						sds->props[SP_NEXT_MME] * i +
8872 						j * sizeof(u32));
8873 
8874 			rc = gaudi_add_sync_to_engine_map_entry(
8875 				map, reg_value, ENGINE_MME,
8876 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8877 			if (rc)
8878 				goto free_sync_to_engine_map;
8879 		}
8880 	}
8881 
8882 	/* Iterate over DMA engines */
8883 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8884 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8885 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8886 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8887 							ENGINE_DMA, i);
8888 		if (rc)
8889 			goto free_sync_to_engine_map;
8890 	}
8891 
8892 	return 0;
8893 
8894 free_sync_to_engine_map:
8895 	hl_state_dump_free_sync_to_engine_map(map);
8896 
8897 	return rc;
8898 }
8899 
8900 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8901 {
8902 	return FIELD_GET(
8903 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8904 		mon->status);
8905 }
8906 
8907 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8908 {
8909 	const size_t max_write = 10;
8910 	u32 gid, mask, sob;
8911 	int i, offset;
8912 
8913 	/* Sync object ID is calculated as follows:
8914 	 * (8 * group_id + cleared bits in mask)
8915 	 */
8916 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8917 			mon->arm_data);
8918 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8919 			mon->arm_data);
8920 
8921 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8922 		max_write; mask >>= 1, i++) {
8923 		if (!(mask & 1)) {
8924 			sob = gid * MONITOR_MAX_SOBS + i;
8925 
8926 			if (offset > 0)
8927 				offset += snprintf(sobs + offset, max_write,
8928 							", ");
8929 
8930 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8931 		}
8932 	}
8933 }
8934 
8935 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8936 				struct hl_device *hdev,
8937 				struct hl_mon_state_dump *mon)
8938 {
8939 	const char *name;
8940 	char scratch_buf1[BIN_REG_STRING_SIZE],
8941 		scratch_buf2[BIN_REG_STRING_SIZE];
8942 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8943 
8944 	name = hl_state_dump_get_monitor_name(hdev, mon);
8945 	if (!name)
8946 		name = "";
8947 
8948 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8949 
8950 	return hl_snprintf_resize(
8951 		buf, size, offset,
8952 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8953 		mon->id, name,
8954 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8955 				mon->arm_data),
8956 		hl_format_as_binary(
8957 			scratch_buf1, sizeof(scratch_buf1),
8958 			FIELD_GET(
8959 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8960 				mon->arm_data)),
8961 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8962 				mon->arm_data),
8963 		mon->wr_data,
8964 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8965 		hl_format_as_binary(
8966 			scratch_buf2, sizeof(scratch_buf2),
8967 			FIELD_GET(
8968 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8969 				mon->status)),
8970 		monitored_sobs);
8971 }
8972 
8973 
8974 static int gaudi_print_fences_single_engine(
8975 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8976 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8977 	size_t *size, size_t *offset)
8978 {
8979 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8980 	int rc = -ENOMEM, i;
8981 	u32 *statuses, *fences;
8982 
8983 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8984 			sizeof(*statuses), GFP_KERNEL);
8985 	if (!statuses)
8986 		goto out;
8987 
8988 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8989 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8990 			 sizeof(*fences), GFP_KERNEL);
8991 	if (!fences)
8992 		goto free_status;
8993 
8994 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8995 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8996 
8997 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8998 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8999 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9000 
9001 	/* The actual print */
9002 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9003 		u32 fence_id;
9004 		u64 fence_cnt, fence_rdata;
9005 		const char *engine_name;
9006 
9007 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9008 			statuses[i]))
9009 			continue;
9010 
9011 		fence_id =
9012 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9013 		fence_cnt = base_offset + CFG_BASE +
9014 			sizeof(u32) *
9015 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9016 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9017 				sds->props[SP_FENCE0_RDATA_OFFSET];
9018 		engine_name = hl_sync_engine_to_string(engine_type);
9019 
9020 		rc = hl_snprintf_resize(
9021 			buf, size, offset,
9022 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9023 			engine_name, engine_id,
9024 			i, fence_id,
9025 			fence_cnt, engine_name, engine_id, fence_id, i,
9026 			fence_rdata, engine_name, engine_id, fence_id, i,
9027 			fences[fence_id],
9028 			statuses[i]);
9029 		if (rc)
9030 			goto free_fences;
9031 	}
9032 
9033 	rc = 0;
9034 
9035 free_fences:
9036 	kfree(fences);
9037 free_status:
9038 	kfree(statuses);
9039 out:
9040 	return rc;
9041 }
9042 
9043 
9044 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9045 	.monitor_valid = gaudi_monitor_valid,
9046 	.print_single_monitor = gaudi_print_single_monitor,
9047 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9048 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9049 };
9050 
9051 static void gaudi_state_dump_init(struct hl_device *hdev)
9052 {
9053 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9054 	int i;
9055 
9056 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9057 		hash_add(sds->so_id_to_str_tb,
9058 			&gaudi_so_id_to_str[i].node,
9059 			gaudi_so_id_to_str[i].id);
9060 
9061 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9062 		hash_add(sds->monitor_id_to_str_tb,
9063 			&gaudi_monitor_id_to_str[i].node,
9064 			gaudi_monitor_id_to_str[i].id);
9065 
9066 	sds->props = gaudi_state_dump_specs_props;
9067 
9068 	sds->sync_namager_names = gaudi_sync_manager_names;
9069 
9070 	sds->funcs = gaudi_state_dump_funcs;
9071 }
9072 
9073 static u32 *gaudi_get_stream_master_qid_arr(void)
9074 {
9075 	return gaudi_stream_master;
9076 }
9077 
9078 static int gaudi_set_dram_properties(struct hl_device *hdev)
9079 {
9080 	return 0;
9081 }
9082 
9083 static int gaudi_set_binning_masks(struct hl_device *hdev)
9084 {
9085 	return 0;
9086 }
9087 
9088 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9089 {
9090 }
9091 
9092 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9093 {
9094 	struct hl_device *hdev = dev_get_drvdata(dev);
9095 	struct cpucp_info *cpucp_info;
9096 
9097 	cpucp_info = &hdev->asic_prop.cpucp_info;
9098 
9099 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9100 }
9101 
9102 static DEVICE_ATTR_RO(infineon_ver);
9103 
9104 static struct attribute *gaudi_vrm_dev_attrs[] = {
9105 	&dev_attr_infineon_ver.attr,
9106 	NULL,
9107 };
9108 
9109 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9110 					struct attribute_group *dev_vrm_attr_grp)
9111 {
9112 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9113 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9114 }
9115 
9116 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9117 {
9118 	return 0;
9119 }
9120 
9121 static const struct hl_asic_funcs gaudi_funcs = {
9122 	.early_init = gaudi_early_init,
9123 	.early_fini = gaudi_early_fini,
9124 	.late_init = gaudi_late_init,
9125 	.late_fini = gaudi_late_fini,
9126 	.sw_init = gaudi_sw_init,
9127 	.sw_fini = gaudi_sw_fini,
9128 	.hw_init = gaudi_hw_init,
9129 	.hw_fini = gaudi_hw_fini,
9130 	.halt_engines = gaudi_halt_engines,
9131 	.suspend = gaudi_suspend,
9132 	.resume = gaudi_resume,
9133 	.mmap = gaudi_mmap,
9134 	.ring_doorbell = gaudi_ring_doorbell,
9135 	.pqe_write = gaudi_pqe_write,
9136 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9137 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9138 	.scrub_device_mem = gaudi_scrub_device_mem,
9139 	.scrub_device_dram = gaudi_scrub_device_dram,
9140 	.get_int_queue_base = gaudi_get_int_queue_base,
9141 	.test_queues = gaudi_test_queues,
9142 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9143 	.asic_dma_pool_free = gaudi_dma_pool_free,
9144 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9145 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9146 	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9147 	.cs_parser = gaudi_cs_parser,
9148 	.dma_map_sgtable = hl_asic_dma_map_sgtable,
9149 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9150 	.update_eq_ci = gaudi_update_eq_ci,
9151 	.context_switch = gaudi_context_switch,
9152 	.restore_phase_topology = gaudi_restore_phase_topology,
9153 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9154 	.add_device_attr = gaudi_add_device_attr,
9155 	.handle_eqe = gaudi_handle_eqe,
9156 	.get_events_stat = gaudi_get_events_stat,
9157 	.read_pte = gaudi_read_pte,
9158 	.write_pte = gaudi_write_pte,
9159 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9160 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9161 	.mmu_prefetch_cache_range = NULL,
9162 	.send_heartbeat = gaudi_send_heartbeat,
9163 	.debug_coresight = gaudi_debug_coresight,
9164 	.is_device_idle = gaudi_is_device_idle,
9165 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9166 	.hw_queues_lock = gaudi_hw_queues_lock,
9167 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9168 	.get_pci_id = gaudi_get_pci_id,
9169 	.get_eeprom_data = gaudi_get_eeprom_data,
9170 	.get_monitor_dump = gaudi_get_monitor_dump,
9171 	.send_cpu_message = gaudi_send_cpu_message,
9172 	.pci_bars_map = gaudi_pci_bars_map,
9173 	.init_iatu = gaudi_init_iatu,
9174 	.rreg = hl_rreg,
9175 	.wreg = hl_wreg,
9176 	.halt_coresight = gaudi_halt_coresight,
9177 	.ctx_init = gaudi_ctx_init,
9178 	.ctx_fini = gaudi_ctx_fini,
9179 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9180 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9181 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9182 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9183 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9184 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9185 	.gen_signal_cb = gaudi_gen_signal_cb,
9186 	.gen_wait_cb = gaudi_gen_wait_cb,
9187 	.reset_sob = gaudi_reset_sob,
9188 	.reset_sob_group = gaudi_reset_sob_group,
9189 	.get_device_time = gaudi_get_device_time,
9190 	.pb_print_security_errors = NULL,
9191 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9192 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9193 	.get_dec_base_addr = NULL,
9194 	.scramble_addr = hl_mmu_scramble_addr,
9195 	.descramble_addr = hl_mmu_descramble_addr,
9196 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9197 	.get_hw_block_id = gaudi_get_hw_block_id,
9198 	.hw_block_mmap = gaudi_block_mmap,
9199 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9200 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9201 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9202 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9203 	.init_firmware_loader = gaudi_init_firmware_loader,
9204 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9205 	.state_dump_init = gaudi_state_dump_init,
9206 	.get_sob_addr = gaudi_get_sob_addr,
9207 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9208 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9209 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9210 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9211 	.access_dev_mem = hl_access_dev_mem,
9212 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9213 	.send_device_activity = gaudi_send_device_activity,
9214 	.set_dram_properties = gaudi_set_dram_properties,
9215 	.set_binning_masks = gaudi_set_binning_masks,
9216 };
9217 
9218 /**
9219  * gaudi_set_asic_funcs - set GAUDI function pointers
9220  *
9221  * @hdev: pointer to hl_device structure
9222  *
9223  */
9224 void gaudi_set_asic_funcs(struct hl_device *hdev)
9225 {
9226 	hdev->asic_funcs = &gaudi_funcs;
9227 }
9228