xref: /linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision a3a02a52bcfcbcc4a637d4b68bf1bc391c9fad02)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69 
70 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
71 
72 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
76 
77 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
86 
87 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
88 
89 #define GAUDI_MAX_STRING_LEN		20
90 
91 #define GAUDI_CB_POOL_CB_CNT		512
92 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
93 
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
95 
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
97 
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
99 
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
101 
102 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
103 
104 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105 
106 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
107 
108 #define MONITOR_SOB_STRING_SIZE		256
109 
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 	GAUDI_QUEUE_ID_DMA_0_0,
112 	GAUDI_QUEUE_ID_DMA_0_1,
113 	GAUDI_QUEUE_ID_DMA_0_2,
114 	GAUDI_QUEUE_ID_DMA_0_3,
115 	GAUDI_QUEUE_ID_DMA_1_0,
116 	GAUDI_QUEUE_ID_DMA_1_1,
117 	GAUDI_QUEUE_ID_DMA_1_2,
118 	GAUDI_QUEUE_ID_DMA_1_3
119 };
120 
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131 
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
134 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
135 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
136 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
137 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
138 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
139 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
140 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142 
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
145 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
146 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
147 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
148 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
149 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
150 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
151 	[PACKET_FENCE]		= sizeof(struct packet_fence),
152 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
153 	[PACKET_NOP]		= sizeof(struct packet_nop),
154 	[PACKET_STOP]		= sizeof(struct packet_stop),
155 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
156 	[PACKET_WAIT]		= sizeof(struct packet_wait),
157 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
158 };
159 
160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 	switch (id) {
163 	case PACKET_WREG_32:
164 	case PACKET_WREG_BULK:
165 	case PACKET_MSG_LONG:
166 	case PACKET_MSG_SHORT:
167 	case PACKET_CP_DMA:
168 	case PACKET_REPEAT:
169 	case PACKET_MSG_PROT:
170 	case PACKET_FENCE:
171 	case PACKET_LIN_DMA:
172 	case PACKET_NOP:
173 	case PACKET_STOP:
174 	case PACKET_ARB_POINT:
175 	case PACKET_WAIT:
176 	case PACKET_LOAD_AND_EXE:
177 		return true;
178 	default:
179 		return false;
180 	}
181 }
182 
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 	"tpc_address_exceed_slm",
186 	"tpc_div_by_0",
187 	"tpc_spu_mac_overflow",
188 	"tpc_spu_addsub_overflow",
189 	"tpc_spu_abs_overflow",
190 	"tpc_spu_fp_dst_nan_inf",
191 	"tpc_spu_fp_dst_denorm",
192 	"tpc_vpu_mac_overflow",
193 	"tpc_vpu_addsub_overflow",
194 	"tpc_vpu_abs_overflow",
195 	"tpc_vpu_fp_dst_nan_inf",
196 	"tpc_vpu_fp_dst_denorm",
197 	"tpc_assertions",
198 	"tpc_illegal_instruction",
199 	"tpc_pc_wrap_around",
200 	"tpc_qm_sw_err",
201 	"tpc_hbw_rresp_err",
202 	"tpc_hbw_bresp_err",
203 	"tpc_lbw_rresp_err",
204 	"tpc_lbw_bresp_err"
205 };
206 
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 	"PQ AXI HBW error",
210 	"CQ AXI HBW error",
211 	"CP AXI HBW error",
212 	"CP error due to undefined OPCODE",
213 	"CP encountered STOP OPCODE",
214 	"CP AXI LBW error",
215 	"CP WRREG32 or WRBULK returned error",
216 	"N/A",
217 	"FENCE 0 inc over max value and clipped",
218 	"FENCE 1 inc over max value and clipped",
219 	"FENCE 2 inc over max value and clipped",
220 	"FENCE 3 inc over max value and clipped",
221 	"FENCE 0 dec under min value and clipped",
222 	"FENCE 1 dec under min value and clipped",
223 	"FENCE 2 dec under min value and clipped",
224 	"FENCE 3 dec under min value and clipped"
225 };
226 
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 	"Choice push while full error",
230 	"Choice Q watchdog error",
231 	"MSG AXI LBW returned with error"
232 };
233 
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349 
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379 
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393 
394 static s64 gaudi_state_dump_specs_props[] = {
395 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 	[SP_MON_OBJ_WR_ADDR_LOW] =
399 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 	[SP_MON_OBJ_WR_ADDR_HIGH] =
401 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 	[SP_FENCE0_CNT_OFFSET] =
423 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 	[SP_FENCE0_RDATA_OFFSET] =
425 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_NUM_CORES] = 1,
428 };
429 
430 static const int gaudi_queue_id_to_engine_id[] = {
431 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461 
462 /* The order here is opposite to the order of the indexing in the h/w.
463  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464  */
465 static const char * const gaudi_sync_manager_names[] = {
466 	"SYNC_MGR_E_N",
467 	"SYNC_MGR_W_N",
468 	"SYNC_MGR_E_S",
469 	"SYNC_MGR_W_S",
470 	NULL
471 };
472 
473 struct ecc_info_extract_params {
474 	u64 block_address;
475 	u32 num_memories;
476 	bool derr;
477 };
478 
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 								u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 					struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 					u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 					u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 				u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 				u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 				struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 		return HL_COLLECTIVE_MASTER;
502 
503 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 		return HL_COLLECTIVE_SLAVE;
506 
507 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 		return HL_COLLECTIVE_SLAVE;
510 
511 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 		return HL_COLLECTIVE_SLAVE;
514 
515 	return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517 
518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 	struct asic_fixed_properties *prop = &hdev->asic_prop;
521 
522 	if (hdev->card_type == cpucp_card_type_pmc) {
523 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524 
525 		if (prop->fw_security_enabled)
526 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 		else
528 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 	} else {
530 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 	}
533 }
534 
535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 	struct asic_fixed_properties *prop = &hdev->asic_prop;
538 	u32 num_sync_stream_queues = 0;
539 	int i;
540 
541 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 	prop->hw_queues_props = kcalloc(prop->max_queues,
543 			sizeof(struct hw_queue_properties),
544 			GFP_KERNEL);
545 
546 	if (!prop->hw_queues_props)
547 		return -ENOMEM;
548 
549 	for (i = 0 ; i < prop->max_queues ; i++) {
550 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 			prop->hw_queues_props[i].driver_only = 0;
553 			prop->hw_queues_props[i].supports_sync_stream = 1;
554 			prop->hw_queues_props[i].cb_alloc_flags =
555 				CB_ALLOC_KERNEL;
556 			num_sync_stream_queues++;
557 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 			prop->hw_queues_props[i].driver_only = 1;
560 			prop->hw_queues_props[i].supports_sync_stream = 0;
561 			prop->hw_queues_props[i].cb_alloc_flags =
562 				CB_ALLOC_KERNEL;
563 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 			prop->hw_queues_props[i].driver_only = 0;
566 			prop->hw_queues_props[i].supports_sync_stream = 0;
567 			prop->hw_queues_props[i].cb_alloc_flags =
568 				CB_ALLOC_USER;
569 
570 		}
571 		prop->hw_queues_props[i].collective_mode =
572 						get_collective_mode(hdev, i);
573 	}
574 
575 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 	prop->cfg_base_address = CFG_BASE;
577 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 	prop->host_base_address = HOST_PHYS_BASE;
579 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 	prop->collective_first_sob = 0;
583 	prop->collective_first_mon = 0;
584 
585 	/* 2 SOBs per internal queue stream are reserved for collective */
586 	prop->sync_stream_first_sob =
587 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 			* QMAN_STREAMS * HL_RSVD_SOBS;
589 
590 	/* 1 monitor per internal queue stream are reserved for collective
591 	 * 2 monitors per external queue stream are reserved for collective
592 	 */
593 	prop->sync_stream_first_mon =
594 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 			(NUMBER_OF_EXT_HW_QUEUES * 2);
596 
597 	prop->dram_base_address = DRAM_PHYS_BASE;
598 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601 
602 	prop->sram_base_address = SRAM_BASE_ADDR;
603 	prop->sram_size = SRAM_SIZE;
604 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 	prop->sram_user_base_address =
606 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607 
608 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610 
611 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 	if (hdev->pldm)
613 		prop->mmu_pgt_size = 0x800000; /* 8MB */
614 	else
615 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 	prop->mmu_pte_size = HL_PTE_SIZE;
617 	prop->dram_page_size = PAGE_SIZE_2MB;
618 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 	prop->dram_supports_virtual_memory = false;
620 
621 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 	prop->pmmu.end_addr =
633 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 	prop->pmmu.page_size = PAGE_SIZE_4KB;
635 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 	prop->pmmu.last_mask = LAST_MASK;
637 	/* TODO: will be duplicated until implementing per-MMU props */
638 	prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639 	prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640 
641 	/* PMMU and HPMMU are the same except of page size */
642 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644 
645 	/* shifts and masks are the same in PMMU and DMMU */
646 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 	prop->dmmu.page_size = PAGE_SIZE_2MB;
650 	prop->dmmu.pgt_size = prop->mmu_pgt_size;
651 
652 	prop->cfg_size = CFG_SIZE;
653 	prop->max_asid = MAX_ASID;
654 	prop->num_of_events = GAUDI_EVENT_SIZE;
655 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657 
658 	set_default_power_values(hdev);
659 
660 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662 
663 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665 
666 	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667 					CARD_NAME_MAX_LEN);
668 
669 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670 
671 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672 			prop->sync_stream_first_sob +
673 			(num_sync_stream_queues * HL_RSVD_SOBS);
674 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_mon +
676 			(num_sync_stream_queues * HL_RSVD_MONS);
677 
678 	prop->first_available_user_interrupt = USHRT_MAX;
679 	prop->tpc_interrupt_id = USHRT_MAX;
680 
681 	/* single msi */
682 	prop->eq_interrupt_id = 0;
683 
684 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 		prop->first_available_cq[i] = USHRT_MAX;
686 
687 	prop->fw_cpu_boot_dev_sts0_valid = false;
688 	prop->fw_cpu_boot_dev_sts1_valid = false;
689 	prop->hard_reset_done_by_fw = false;
690 	prop->gic_interrupts_enable = true;
691 
692 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693 
694 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696 
697 	prop->use_get_power_for_reset_history = true;
698 
699 	prop->configurable_stop_on_err = true;
700 
701 	prop->set_max_power_on_device_init = true;
702 
703 	prop->dma_mask = 48;
704 
705 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706 
707 	return 0;
708 }
709 
710 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 {
712 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 	bool is_wc[3] = {false, false, true};
714 	int rc;
715 
716 	rc = hl_pci_bars_map(hdev, name, is_wc);
717 	if (rc)
718 		return rc;
719 
720 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
722 
723 	return 0;
724 }
725 
726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 {
728 	struct gaudi_device *gaudi = hdev->asic_specific;
729 	struct hl_inbound_pci_region pci_region;
730 	u64 old_addr = addr;
731 	int rc;
732 
733 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 		return old_addr;
735 
736 	if (hdev->asic_prop.iatu_done_by_fw)
737 		return U64_MAX;
738 
739 	/* Inbound Region 2 - Bar 4 - Point to HBM */
740 	pci_region.mode = PCI_BAR_MATCH_MODE;
741 	pci_region.bar = HBM_BAR_ID;
742 	pci_region.addr = addr;
743 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744 	if (rc)
745 		return U64_MAX;
746 
747 	if (gaudi) {
748 		old_addr = gaudi->hbm_bar_cur_addr;
749 		gaudi->hbm_bar_cur_addr = addr;
750 	}
751 
752 	return old_addr;
753 }
754 
755 static int gaudi_init_iatu(struct hl_device *hdev)
756 {
757 	struct hl_inbound_pci_region inbound_region;
758 	struct hl_outbound_pci_region outbound_region;
759 	int rc;
760 
761 	if (hdev->asic_prop.iatu_done_by_fw)
762 		return 0;
763 
764 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 	inbound_region.mode = PCI_BAR_MATCH_MODE;
766 	inbound_region.bar = SRAM_BAR_ID;
767 	inbound_region.addr = SRAM_BASE_ADDR;
768 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 	if (rc)
770 		goto done;
771 
772 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 	inbound_region.mode = PCI_BAR_MATCH_MODE;
774 	inbound_region.bar = CFG_BAR_ID;
775 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 	if (rc)
778 		goto done;
779 
780 	/* Inbound Region 2 - Bar 4 - Point to HBM */
781 	inbound_region.mode = PCI_BAR_MATCH_MODE;
782 	inbound_region.bar = HBM_BAR_ID;
783 	inbound_region.addr = DRAM_PHYS_BASE;
784 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 	if (rc)
786 		goto done;
787 
788 	/* Outbound Region 0 - Point to Host */
789 	outbound_region.addr = HOST_PHYS_BASE;
790 	outbound_region.size = HOST_PHYS_SIZE;
791 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792 
793 done:
794 	return rc;
795 }
796 
797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 {
799 	return RREG32(mmHW_STATE);
800 }
801 
802 static int gaudi_early_init(struct hl_device *hdev)
803 {
804 	struct asic_fixed_properties *prop = &hdev->asic_prop;
805 	struct pci_dev *pdev = hdev->pdev;
806 	resource_size_t pci_bar_size;
807 	u32 fw_boot_status;
808 	int rc;
809 
810 	rc = gaudi_set_fixed_properties(hdev);
811 	if (rc) {
812 		dev_err(hdev->dev, "Failed setting fixed properties\n");
813 		return rc;
814 	}
815 
816 	/* Check BAR sizes */
817 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818 
819 	if (pci_bar_size != SRAM_BAR_SIZE) {
820 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 		rc = -ENODEV;
823 		goto free_queue_props;
824 	}
825 
826 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827 
828 	if (pci_bar_size != CFG_BAR_SIZE) {
829 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 		rc = -ENODEV;
832 		goto free_queue_props;
833 	}
834 
835 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837 
838 	/* If FW security is enabled at this point it means no access to ELBI */
839 	if (hdev->asic_prop.fw_security_enabled) {
840 		hdev->asic_prop.iatu_done_by_fw = true;
841 
842 		/*
843 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 		 * decision can only be taken based on PCI ID security.
845 		 */
846 		hdev->asic_prop.gic_interrupts_enable = false;
847 		goto pci_init;
848 	}
849 
850 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 				&fw_boot_status);
852 	if (rc)
853 		goto free_queue_props;
854 
855 	/* Check whether FW is configuring iATU */
856 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 		hdev->asic_prop.iatu_done_by_fw = true;
859 
860 pci_init:
861 	rc = hl_pci_init(hdev);
862 	if (rc)
863 		goto free_queue_props;
864 
865 	/* Before continuing in the initialization, we need to read the preboot
866 	 * version to determine whether we run with a security-enabled firmware
867 	 */
868 	rc = hl_fw_read_preboot_status(hdev);
869 	if (rc) {
870 		if (hdev->reset_on_preboot_fail)
871 			/* we are already on failure flow, so don't check if hw_fini fails. */
872 			hdev->asic_funcs->hw_fini(hdev, true, false);
873 		goto pci_fini;
874 	}
875 
876 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 		if (rc) {
880 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 			goto pci_fini;
882 		}
883 	}
884 
885 	return 0;
886 
887 pci_fini:
888 	hl_pci_fini(hdev);
889 free_queue_props:
890 	kfree(hdev->asic_prop.hw_queues_props);
891 	return rc;
892 }
893 
894 static int gaudi_early_fini(struct hl_device *hdev)
895 {
896 	kfree(hdev->asic_prop.hw_queues_props);
897 	hl_pci_fini(hdev);
898 
899 	return 0;
900 }
901 
902 /**
903  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904  *
905  * @hdev: pointer to hl_device structure
906  *
907  */
908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 {
910 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 	struct asic_fixed_properties *prop = &hdev->asic_prop;
912 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 	int rc;
914 
915 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 		struct gaudi_device *gaudi = hdev->asic_specific;
918 
919 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 			return 0;
921 
922 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923 
924 		if (rc)
925 			return rc;
926 
927 		freq = pll_freq_arr[2];
928 	} else {
929 		/* Backward compatibility */
930 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 		nr = RREG32(mmPSOC_CPU_PLL_NR);
933 		nf = RREG32(mmPSOC_CPU_PLL_NF);
934 		od = RREG32(mmPSOC_CPU_PLL_OD);
935 
936 		if (div_sel == DIV_SEL_REF_CLK ||
937 				div_sel == DIV_SEL_DIVIDED_REF) {
938 			if (div_sel == DIV_SEL_REF_CLK)
939 				freq = PLL_REF_CLK;
940 			else
941 				freq = PLL_REF_CLK / (div_fctr + 1);
942 		} else if (div_sel == DIV_SEL_PLL_CLK ||
943 			div_sel == DIV_SEL_DIVIDED_PLL) {
944 			pll_clk = PLL_REF_CLK * (nf + 1) /
945 					((nr + 1) * (od + 1));
946 			if (div_sel == DIV_SEL_PLL_CLK)
947 				freq = pll_clk;
948 			else
949 				freq = pll_clk / (div_fctr + 1);
950 		} else {
951 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 			freq = 0;
953 		}
954 	}
955 
956 	prop->psoc_timestamp_frequency = freq;
957 	prop->psoc_pci_pll_nr = nr;
958 	prop->psoc_pci_pll_nf = nf;
959 	prop->psoc_pci_pll_od = od;
960 	prop->psoc_pci_pll_div_factor = div_fctr;
961 
962 	return 0;
963 }
964 
965 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 {
968 	struct asic_fixed_properties *prop = &hdev->asic_prop;
969 	struct packet_lin_dma *init_tpc_mem_pkt;
970 	struct hl_cs_job *job;
971 	struct hl_cb *cb;
972 	u64 dst_addr;
973 	u32 cb_size, ctl;
974 	u8 tpc_id;
975 	int rc;
976 
977 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978 	if (!cb)
979 		return -EFAULT;
980 
981 	init_tpc_mem_pkt = cb->kernel_address;
982 	cb_size = sizeof(*init_tpc_mem_pkt);
983 	memset(init_tpc_mem_pkt, 0, cb_size);
984 
985 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986 
987 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991 
992 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993 
994 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995 
996 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 				round_up(prop->sram_user_base_address, SZ_8K));
999 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000 
1001 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 	if (!job) {
1003 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 		rc = -ENOMEM;
1005 		goto release_cb;
1006 	}
1007 
1008 	job->id = 0;
1009 	job->user_cb = cb;
1010 	atomic_inc(&job->user_cb->cs_cnt);
1011 	job->user_cb_size = cb_size;
1012 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 	job->patched_cb = job->user_cb;
1014 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015 
1016 	hl_debugfs_add_job(hdev, job);
1017 
1018 	rc = gaudi_send_job_on_qman0(hdev, job);
1019 
1020 	if (rc)
1021 		goto free_job;
1022 
1023 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025 		if (rc)
1026 			break;
1027 	}
1028 
1029 free_job:
1030 	hl_userptr_delete_list(hdev, &job->userptr_list);
1031 	hl_debugfs_remove_job(hdev, job);
1032 	kfree(job);
1033 	atomic_dec(&cb->cs_cnt);
1034 
1035 release_cb:
1036 	hl_cb_put(cb);
1037 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038 
1039 	return rc;
1040 }
1041 
1042 /*
1043  * gaudi_init_tpc_mem() - Initialize TPC memories.
1044  * @hdev: Pointer to hl_device structure.
1045  *
1046  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047  *
1048  * Return: 0 for success, negative value for error.
1049  */
1050 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 {
1052 	const struct firmware *fw;
1053 	size_t fw_size;
1054 	void *cpu_addr;
1055 	dma_addr_t dma_handle;
1056 	int rc, count = 5;
1057 
1058 again:
1059 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060 	if (rc == -EINTR && count-- > 0) {
1061 		msleep(50);
1062 		goto again;
1063 	}
1064 
1065 	if (rc) {
1066 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 				GAUDI_TPC_FW_FILE);
1068 		goto out;
1069 	}
1070 
1071 	fw_size = fw->size;
1072 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 	if (!cpu_addr) {
1074 		dev_err(hdev->dev,
1075 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1076 			fw_size);
1077 		rc = -ENOMEM;
1078 		goto out;
1079 	}
1080 
1081 	memcpy(cpu_addr, fw->data, fw_size);
1082 
1083 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084 
1085 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086 
1087 out:
1088 	release_firmware(fw);
1089 	return rc;
1090 }
1091 
1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 {
1094 	struct gaudi_device *gaudi = hdev->asic_specific;
1095 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 	struct hl_hw_queue *q;
1097 	u32 i, sob_id, sob_group_id, queue_id;
1098 
1099 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1100 	sob_group_id =
1101 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103 
1104 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 	}
1109 
1110 	/* Both DMA5 and TPC7 use the same resources since only a single
1111 	 * engine need to participate in the reduction process
1112 	 */
1113 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 	q = &hdev->kernel_queues[queue_id];
1115 	q->sync_stream_prop.collective_sob_id =
1116 			sob_id + NIC_NUMBER_OF_ENGINES;
1117 
1118 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 	q = &hdev->kernel_queues[queue_id];
1120 	q->sync_stream_prop.collective_sob_id =
1121 			sob_id + NIC_NUMBER_OF_ENGINES;
1122 }
1123 
1124 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 {
1126 	struct gaudi_hw_sob_group *hw_sob_group =
1127 		container_of(ref, struct gaudi_hw_sob_group, kref);
1128 	struct hl_device *hdev = hw_sob_group->hdev;
1129 	int i;
1130 
1131 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134 
1135 	kref_init(&hw_sob_group->kref);
1136 }
1137 
1138 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 {
1140 	struct gaudi_hw_sob_group *hw_sob_group =
1141 		container_of(ref, struct gaudi_hw_sob_group, kref);
1142 	struct hl_device *hdev = hw_sob_group->hdev;
1143 
1144 	dev_crit(hdev->dev,
1145 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1146 		hw_sob_group->base_sob_id);
1147 }
1148 
1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 {
1151 	struct gaudi_collective_properties *prop;
1152 	int i;
1153 
1154 	prop = &gaudi->collective_props;
1155 
1156 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157 
1158 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 	/* Set collective engine bit */
1163 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165 }
1166 
1167 static int gaudi_collective_init(struct hl_device *hdev)
1168 {
1169 	u32 i, sob_id, reserved_sobs_per_group;
1170 	struct gaudi_collective_properties *prop;
1171 	struct gaudi_device *gaudi;
1172 
1173 	gaudi = hdev->asic_specific;
1174 	prop = &gaudi->collective_props;
1175 	sob_id = hdev->asic_prop.collective_first_sob;
1176 
1177 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 	reserved_sobs_per_group =
1179 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180 
1181 	/* Init SOB groups */
1182 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 		prop->hw_sob_group[i].hdev = hdev;
1184 		prop->hw_sob_group[i].base_sob_id = sob_id;
1185 		sob_id += reserved_sobs_per_group;
1186 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187 	}
1188 
1189 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 		prop->next_sob_group_val[i] = 1;
1191 		prop->curr_sob_group_idx[i] = 0;
1192 		gaudi_collective_map_sobs(hdev, i);
1193 	}
1194 
1195 	gaudi_collective_mstr_sob_mask_set(gaudi);
1196 
1197 	return 0;
1198 }
1199 
1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 {
1202 	struct gaudi_device *gaudi = hdev->asic_specific;
1203 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204 
1205 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1206 					gaudi_sob_group_hw_reset);
1207 }
1208 
1209 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 {
1212 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 	struct gaudi_collective_properties *cprop;
1214 	struct hl_gen_wait_properties wait_prop;
1215 	struct hl_sync_stream_properties *prop;
1216 	struct gaudi_device *gaudi;
1217 
1218 	gaudi = hdev->asic_specific;
1219 	cprop = &gaudi->collective_props;
1220 	queue_id = job->hw_queue_id;
1221 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222 
1223 	master_sob_base =
1224 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 	master_monitor = prop->collective_mstr_mon_id[0];
1226 
1227 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228 
1229 	dev_dbg(hdev->dev,
1230 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 		master_sob_base, cprop->mstr_sob_mask[0],
1232 		cprop->next_sob_group_val[stream],
1233 		master_monitor, queue_id);
1234 
1235 	wait_prop.data = (void *) job->patched_cb;
1236 	wait_prop.sob_base = master_sob_base;
1237 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 	wait_prop.mon_id = master_monitor;
1240 	wait_prop.q_idx = queue_id;
1241 	wait_prop.size = cb_size;
1242 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243 
1244 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 	master_monitor = prop->collective_mstr_mon_id[1];
1246 
1247 	dev_dbg(hdev->dev,
1248 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 		master_sob_base, cprop->mstr_sob_mask[1],
1250 		cprop->next_sob_group_val[stream],
1251 		master_monitor, queue_id);
1252 
1253 	wait_prop.sob_base = master_sob_base;
1254 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 	wait_prop.mon_id = master_monitor;
1256 	wait_prop.size = cb_size;
1257 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258 }
1259 
1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 {
1263 	struct hl_gen_wait_properties wait_prop;
1264 	struct hl_sync_stream_properties *prop;
1265 	u32 queue_id, cb_size = 0;
1266 
1267 	queue_id = job->hw_queue_id;
1268 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269 
1270 	if (job->cs->encaps_signals) {
1271 		/* use the encaps signal handle store earlier in the flow
1272 		 * and set the SOB information from the encaps
1273 		 * signals handle
1274 		 */
1275 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276 						cs_cmpl);
1277 
1278 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1279 				job->cs->sequence,
1280 				cs_cmpl->hw_sob->sob_id,
1281 				cs_cmpl->sob_val);
1282 	}
1283 
1284 	/* Add to wait CBs using slave monitor */
1285 	wait_prop.data = (void *) job->user_cb;
1286 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 	wait_prop.sob_mask = 0x1;
1288 	wait_prop.sob_val = cs_cmpl->sob_val;
1289 	wait_prop.mon_id = prop->collective_slave_mon_id;
1290 	wait_prop.q_idx = queue_id;
1291 	wait_prop.size = cb_size;
1292 
1293 	dev_dbg(hdev->dev,
1294 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 		prop->collective_slave_mon_id, queue_id);
1297 
1298 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299 
1300 	dev_dbg(hdev->dev,
1301 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 		prop->collective_sob_id, queue_id);
1303 
1304 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305 			prop->collective_sob_id, cb_size, false);
1306 }
1307 
1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 {
1310 	struct hl_cs_compl *signal_cs_cmpl =
1311 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 	struct hl_cs_compl *cs_cmpl =
1313 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 	struct gaudi_collective_properties *cprop;
1316 	u32 stream, queue_id, sob_group_offset;
1317 	struct gaudi_device *gaudi;
1318 	struct hl_device *hdev;
1319 	struct hl_cs_job *job;
1320 	struct hl_ctx *ctx;
1321 
1322 	ctx = cs->ctx;
1323 	hdev = ctx->hdev;
1324 	gaudi = hdev->asic_specific;
1325 	cprop = &gaudi->collective_props;
1326 
1327 	if (cs->encaps_signals) {
1328 		cs_cmpl->hw_sob = handle->hw_sob;
1329 		/* at this checkpoint we only need the hw_sob pointer
1330 		 * for the completion check before start going over the jobs
1331 		 * of the master/slaves, the sob_value will be taken later on
1332 		 * in gaudi_collective_slave_init_job depends on each
1333 		 * job wait offset value.
1334 		 */
1335 		cs_cmpl->sob_val = 0;
1336 	} else {
1337 		/* copy the SOB id and value of the signal CS */
1338 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 	}
1341 
1342 	/* check again if the signal cs already completed.
1343 	 * if yes then don't send any wait cs since the hw_sob
1344 	 * could be in reset already. if signal is not completed
1345 	 * then get refcount to hw_sob to prevent resetting the sob
1346 	 * while wait cs is not submitted.
1347 	 * note that this check is protected by two locks,
1348 	 * hw queue lock and completion object lock,
1349 	 * and the same completion object lock also protects
1350 	 * the hw_sob reset handler function.
1351 	 * The hw_queue lock prevent out of sync of hw_sob
1352 	 * refcount value, changed by signal/wait flows.
1353 	 */
1354 	spin_lock(&signal_cs_cmpl->lock);
1355 
1356 	if (completion_done(&cs->signal_fence->completion)) {
1357 		spin_unlock(&signal_cs_cmpl->lock);
1358 		return -EINVAL;
1359 	}
1360 	/* Increment kref since all slave queues are now waiting on it */
1361 	kref_get(&cs_cmpl->hw_sob->kref);
1362 
1363 	spin_unlock(&signal_cs_cmpl->lock);
1364 
1365 	/* Calculate the stream from collective master queue (1st job) */
1366 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 	stream = job->hw_queue_id % 4;
1368 	sob_group_offset =
1369 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370 
1371 	list_for_each_entry(job, &cs->job_list, cs_node) {
1372 		queue_id = job->hw_queue_id;
1373 
1374 		if (hdev->kernel_queues[queue_id].collective_mode ==
1375 				HL_COLLECTIVE_MASTER)
1376 			gaudi_collective_master_init_job(hdev, job, stream,
1377 						sob_group_offset);
1378 		else
1379 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 	}
1381 
1382 	cs_cmpl->sob_group = sob_group_offset;
1383 
1384 	/* Handle sob group kref and wraparound */
1385 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386 	cprop->next_sob_group_val[stream]++;
1387 
1388 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 		/*
1390 		 * Decrement as we reached the max value.
1391 		 * The release function won't be called here as we've
1392 		 * just incremented the refcount.
1393 		 */
1394 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395 				gaudi_sob_group_reset_error);
1396 		cprop->next_sob_group_val[stream] = 1;
1397 		/* only two SOBs are currently in use */
1398 		cprop->curr_sob_group_idx[stream] =
1399 			(cprop->curr_sob_group_idx[stream] + 1) &
1400 							(HL_RSVD_SOBS - 1);
1401 
1402 		gaudi_collective_map_sobs(hdev, stream);
1403 
1404 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 				cprop->curr_sob_group_idx[stream], stream);
1406 	}
1407 
1408 	mb();
1409 	hl_fence_put(cs->signal_fence);
1410 	cs->signal_fence = NULL;
1411 
1412 	return 0;
1413 }
1414 
1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 {
1417 	u32 cacheline_end, additional_commands;
1418 
1419 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1421 
1422 	if (user_cb_size + additional_commands > cacheline_end)
1423 		return cacheline_end - user_cb_size + additional_commands;
1424 	else
1425 		return additional_commands;
1426 }
1427 
1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 		struct hl_ctx *ctx, struct hl_cs *cs,
1430 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 		u32 encaps_signal_offset)
1432 {
1433 	struct hw_queue_properties *hw_queue_prop;
1434 	struct hl_cs_counters_atomic *cntr;
1435 	struct hl_cs_job *job;
1436 	struct hl_cb *cb;
1437 	u32 cb_size;
1438 	bool patched_cb;
1439 
1440 	cntr = &hdev->aggregated_cs_counters;
1441 
1442 	if (mode == HL_COLLECTIVE_MASTER) {
1443 		/* CB size of collective master queue contains
1444 		 * 4 msg short packets for monitor 1 configuration
1445 		 * 1 fence packet
1446 		 * 4 msg short packets for monitor 2 configuration
1447 		 * 1 fence packet
1448 		 * 2 msg prot packets for completion and MSI
1449 		 */
1450 		cb_size = sizeof(struct packet_msg_short) * 8 +
1451 				sizeof(struct packet_fence) * 2 +
1452 				sizeof(struct packet_msg_prot) * 2;
1453 		patched_cb = true;
1454 	} else {
1455 		/* CB size of collective slave queues contains
1456 		 * 4 msg short packets for monitor configuration
1457 		 * 1 fence packet
1458 		 * 1 additional msg short packet for sob signal
1459 		 */
1460 		cb_size = sizeof(struct packet_msg_short) * 5 +
1461 				sizeof(struct packet_fence);
1462 		patched_cb = false;
1463 	}
1464 
1465 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 	if (!job) {
1468 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 		return -ENOMEM;
1472 	}
1473 
1474 	/* Allocate internal mapped CB for non patched CBs */
1475 	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476 	if (!cb) {
1477 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479 		kfree(job);
1480 		return -EFAULT;
1481 	}
1482 
1483 	job->id = 0;
1484 	job->cs = cs;
1485 	job->user_cb = cb;
1486 	atomic_inc(&job->user_cb->cs_cnt);
1487 	job->user_cb_size = cb_size;
1488 	job->hw_queue_id = queue_id;
1489 
1490 	/* since its guaranteed to have only one chunk in the collective wait
1491 	 * cs, we can use this chunk to set the encapsulated signal offset
1492 	 * in the jobs.
1493 	 */
1494 	if (cs->encaps_signals)
1495 		job->encaps_sig_wait_offset = encaps_signal_offset;
1496 
1497 	/*
1498 	 * No need in parsing, user CB is the patched CB.
1499 	 * We call hl_cb_destroy() out of two reasons - we don't need
1500 	 * the CB in the CB idr anymore and to decrement its refcount as
1501 	 * it was incremented inside hl_cb_kernel_create().
1502 	 */
1503 	if (patched_cb)
1504 		job->patched_cb = job->user_cb;
1505 	else
1506 		job->patched_cb = NULL;
1507 
1508 	job->job_cb_size = job->user_cb_size;
1509 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510 
1511 	/* increment refcount as for external queues we get completion */
1512 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513 		cs_get(cs);
1514 
1515 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516 
1517 	list_add_tail(&job->cs_node, &cs->job_list);
1518 
1519 	hl_debugfs_add_job(hdev, job);
1520 
1521 	return 0;
1522 }
1523 
1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525 		struct hl_ctx *ctx, struct hl_cs *cs,
1526 		u32 wait_queue_id, u32 collective_engine_id,
1527 		u32 encaps_signal_offset)
1528 {
1529 	struct gaudi_device *gaudi = hdev->asic_specific;
1530 	struct hw_queue_properties *hw_queue_prop;
1531 	u32 queue_id, collective_queue, num_jobs;
1532 	u32 stream, nic_queue, nic_idx = 0;
1533 	bool skip;
1534 	int i, rc = 0;
1535 
1536 	/* Verify wait queue id is configured as master */
1537 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 		dev_err(hdev->dev,
1540 			"Queue %d is not configured as collective master\n",
1541 			wait_queue_id);
1542 		return -EINVAL;
1543 	}
1544 
1545 	/* Verify engine id is supported */
1546 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 		dev_err(hdev->dev,
1549 			"Collective wait does not support engine %u\n",
1550 			collective_engine_id);
1551 		return -EINVAL;
1552 	}
1553 
1554 	stream = wait_queue_id % 4;
1555 
1556 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 	else
1559 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560 
1561 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563 
1564 	/* First job goes to the collective master queue, it will wait for
1565 	 * the collective slave queues to finish execution.
1566 	 * The synchronization is done using two monitors:
1567 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 	 * reduction engine (DMA5/TPC7).
1569 	 *
1570 	 * Rest of the jobs goes to the collective slave queues which will
1571 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 	 */
1573 	for (i = 0 ; i < num_jobs ; i++) {
1574 		if (i == 0) {
1575 			queue_id = wait_queue_id;
1576 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577 				HL_COLLECTIVE_MASTER, queue_id,
1578 				wait_queue_id, encaps_signal_offset);
1579 		} else {
1580 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581 				if (gaudi->hw_cap_initialized &
1582 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583 					skip = false;
1584 				else
1585 					skip = true;
1586 
1587 				queue_id = nic_queue;
1588 				nic_queue += 4;
1589 				nic_idx++;
1590 
1591 				if (skip)
1592 					continue;
1593 			} else {
1594 				queue_id = collective_queue;
1595 			}
1596 
1597 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598 				HL_COLLECTIVE_SLAVE, queue_id,
1599 				wait_queue_id, encaps_signal_offset);
1600 		}
1601 
1602 		if (rc)
1603 			return rc;
1604 	}
1605 
1606 	return rc;
1607 }
1608 
1609 static int gaudi_late_init(struct hl_device *hdev)
1610 {
1611 	struct gaudi_device *gaudi = hdev->asic_specific;
1612 	int rc;
1613 
1614 	rc = gaudi->cpucp_info_get(hdev);
1615 	if (rc) {
1616 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1617 		return rc;
1618 	}
1619 
1620 	if ((hdev->card_type == cpucp_card_type_pci) &&
1621 			(hdev->nic_ports_mask & 0x3)) {
1622 		dev_info(hdev->dev,
1623 			"PCI card detected, only 8 ports are enabled\n");
1624 		hdev->nic_ports_mask &= ~0x3;
1625 
1626 		/* Stop and disable unused NIC QMANs */
1627 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630 
1631 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634 
1635 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637 
1638 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639 	}
1640 
1641 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642 	if (rc)
1643 		return rc;
1644 
1645 	/* Scrub both SRAM and DRAM */
1646 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1647 	if (rc)
1648 		goto disable_pci_access;
1649 
1650 	rc = gaudi_fetch_psoc_frequency(hdev);
1651 	if (rc) {
1652 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1653 		goto disable_pci_access;
1654 	}
1655 
1656 	rc = gaudi_mmu_clear_pgt_range(hdev);
1657 	if (rc) {
1658 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1659 		goto disable_pci_access;
1660 	}
1661 
1662 	rc = gaudi_init_tpc_mem(hdev);
1663 	if (rc) {
1664 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1665 		goto disable_pci_access;
1666 	}
1667 
1668 	rc = gaudi_collective_init(hdev);
1669 	if (rc) {
1670 		dev_err(hdev->dev, "Failed to init collective\n");
1671 		goto disable_pci_access;
1672 	}
1673 
1674 	/* We only support a single ASID for the user, so for the sake of optimization, just
1675 	 * initialize the ASID one time during device initialization with the fixed value of 1
1676 	 */
1677 	gaudi_mmu_prepare(hdev, 1);
1678 
1679 	hl_fw_set_pll_profile(hdev);
1680 
1681 	return 0;
1682 
1683 disable_pci_access:
1684 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1685 
1686 	return rc;
1687 }
1688 
1689 static void gaudi_late_fini(struct hl_device *hdev)
1690 {
1691 	hl_hwmon_release_resources(hdev);
1692 }
1693 
1694 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1695 {
1696 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1697 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1698 	int i, j, rc = 0;
1699 
1700 	/*
1701 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1702 	 * to '1' when accessing the host.
1703 	 * Bits 49:39 of the full host address are saved for a later
1704 	 * configuration of the HW to perform extension to 50 bits.
1705 	 * Because there is a single HW register that holds the extension bits,
1706 	 * these bits must be identical in all allocated range.
1707 	 */
1708 
1709 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1710 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1711 								&dma_addr_arr[i],
1712 								GFP_KERNEL | __GFP_ZERO);
1713 		if (!virt_addr_arr[i]) {
1714 			rc = -ENOMEM;
1715 			goto free_dma_mem_arr;
1716 		}
1717 
1718 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1719 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1720 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1721 			break;
1722 	}
1723 
1724 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1725 		dev_err(hdev->dev,
1726 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1727 		rc = -EFAULT;
1728 		goto free_dma_mem_arr;
1729 	}
1730 
1731 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1732 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1733 	hdev->cpu_pci_msb_addr =
1734 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1735 
1736 	if (!hdev->asic_prop.fw_security_enabled)
1737 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1738 
1739 free_dma_mem_arr:
1740 	for (j = 0 ; j < i ; j++)
1741 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1742 						dma_addr_arr[j]);
1743 
1744 	return rc;
1745 }
1746 
1747 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1748 {
1749 	struct gaudi_device *gaudi = hdev->asic_specific;
1750 	struct gaudi_internal_qman_info *q;
1751 	u32 i;
1752 
1753 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1754 		q = &gaudi->internal_qmans[i];
1755 		if (!q->pq_kernel_addr)
1756 			continue;
1757 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1758 	}
1759 }
1760 
1761 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1762 {
1763 	struct gaudi_device *gaudi = hdev->asic_specific;
1764 	struct gaudi_internal_qman_info *q;
1765 	int rc, i;
1766 
1767 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1768 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1769 			continue;
1770 
1771 		q = &gaudi->internal_qmans[i];
1772 
1773 		switch (i) {
1774 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1775 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1776 			break;
1777 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1778 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1779 			break;
1780 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1781 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1782 			break;
1783 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1784 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1785 			break;
1786 		default:
1787 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1788 			rc = -EINVAL;
1789 			goto free_internal_qmans_pq_mem;
1790 		}
1791 
1792 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1793 								GFP_KERNEL | __GFP_ZERO);
1794 		if (!q->pq_kernel_addr) {
1795 			rc = -ENOMEM;
1796 			goto free_internal_qmans_pq_mem;
1797 		}
1798 	}
1799 
1800 	return 0;
1801 
1802 free_internal_qmans_pq_mem:
1803 	gaudi_free_internal_qmans_pq_mem(hdev);
1804 	return rc;
1805 }
1806 
1807 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1808 {
1809 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1810 	struct pci_mem_region *region;
1811 
1812 	/* CFG */
1813 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1814 	region->region_base = CFG_BASE;
1815 	region->region_size = CFG_SIZE;
1816 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1817 	region->bar_size = CFG_BAR_SIZE;
1818 	region->bar_id = CFG_BAR_ID;
1819 	region->used = 1;
1820 
1821 	/* SRAM */
1822 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1823 	region->region_base = SRAM_BASE_ADDR;
1824 	region->region_size = SRAM_SIZE;
1825 	region->offset_in_bar = 0;
1826 	region->bar_size = SRAM_BAR_SIZE;
1827 	region->bar_id = SRAM_BAR_ID;
1828 	region->used = 1;
1829 
1830 	/* DRAM */
1831 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1832 	region->region_base = DRAM_PHYS_BASE;
1833 	region->region_size = hdev->asic_prop.dram_size;
1834 	region->offset_in_bar = 0;
1835 	region->bar_size = prop->dram_pci_bar_size;
1836 	region->bar_id = HBM_BAR_ID;
1837 	region->used = 1;
1838 
1839 	/* SP SRAM */
1840 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1841 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1842 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1843 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1844 	region->bar_size = CFG_BAR_SIZE;
1845 	region->bar_id = CFG_BAR_ID;
1846 	region->used = 1;
1847 }
1848 
1849 static int gaudi_sw_init(struct hl_device *hdev)
1850 {
1851 	struct gaudi_device *gaudi;
1852 	u32 i, event_id = 0;
1853 	int rc;
1854 
1855 	/* Allocate device structure */
1856 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1857 	if (!gaudi)
1858 		return -ENOMEM;
1859 
1860 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1861 		if (gaudi_irq_map_table[i].valid) {
1862 			if (event_id == GAUDI_EVENT_SIZE) {
1863 				dev_err(hdev->dev,
1864 					"Event array exceeds the limit of %u events\n",
1865 					GAUDI_EVENT_SIZE);
1866 				rc = -EINVAL;
1867 				goto free_gaudi_device;
1868 			}
1869 
1870 			gaudi->events[event_id++] =
1871 					gaudi_irq_map_table[i].fc_id;
1872 		}
1873 	}
1874 
1875 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1876 
1877 	hdev->asic_specific = gaudi;
1878 
1879 	/* Create DMA pool for small allocations */
1880 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1881 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1882 	if (!hdev->dma_pool) {
1883 		dev_err(hdev->dev, "failed to create DMA pool\n");
1884 		rc = -ENOMEM;
1885 		goto free_gaudi_device;
1886 	}
1887 
1888 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1889 	if (rc)
1890 		goto free_dma_pool;
1891 
1892 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1893 	if (!hdev->cpu_accessible_dma_pool) {
1894 		dev_err(hdev->dev,
1895 			"Failed to create CPU accessible DMA pool\n");
1896 		rc = -ENOMEM;
1897 		goto free_cpu_dma_mem;
1898 	}
1899 
1900 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1901 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1902 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1903 	if (rc) {
1904 		dev_err(hdev->dev,
1905 			"Failed to add memory to CPU accessible DMA pool\n");
1906 		rc = -EFAULT;
1907 		goto free_cpu_accessible_dma_pool;
1908 	}
1909 
1910 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1911 	if (rc)
1912 		goto free_cpu_accessible_dma_pool;
1913 
1914 	spin_lock_init(&gaudi->hw_queues_lock);
1915 
1916 	hdev->supports_sync_stream = true;
1917 	hdev->supports_coresight = true;
1918 	hdev->supports_staged_submission = true;
1919 	hdev->supports_wait_for_multi_cs = true;
1920 
1921 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1922 	hdev->stream_master_qid_arr =
1923 				hdev->asic_funcs->get_stream_master_qid_arr();
1924 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1925 
1926 	return 0;
1927 
1928 free_cpu_accessible_dma_pool:
1929 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1930 free_cpu_dma_mem:
1931 	if (!hdev->asic_prop.fw_security_enabled)
1932 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1933 					hdev->cpu_pci_msb_addr);
1934 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1935 					hdev->cpu_accessible_dma_address);
1936 free_dma_pool:
1937 	dma_pool_destroy(hdev->dma_pool);
1938 free_gaudi_device:
1939 	kfree(gaudi);
1940 	return rc;
1941 }
1942 
1943 static int gaudi_sw_fini(struct hl_device *hdev)
1944 {
1945 	struct gaudi_device *gaudi = hdev->asic_specific;
1946 
1947 	gaudi_free_internal_qmans_pq_mem(hdev);
1948 
1949 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1950 
1951 	if (!hdev->asic_prop.fw_security_enabled)
1952 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1953 					hdev->cpu_pci_msb_addr);
1954 
1955 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1956 					hdev->cpu_accessible_dma_address);
1957 
1958 	dma_pool_destroy(hdev->dma_pool);
1959 
1960 	kfree(gaudi);
1961 
1962 	return 0;
1963 }
1964 
1965 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1966 {
1967 	struct hl_device *hdev = arg;
1968 	int i;
1969 
1970 	if (hdev->disabled)
1971 		return IRQ_HANDLED;
1972 
1973 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1974 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1975 
1976 	hl_irq_handler_eq(irq, &hdev->event_queue);
1977 
1978 	return IRQ_HANDLED;
1979 }
1980 
1981 /*
1982  * For backward compatibility, new MSI interrupts should be set after the
1983  * existing CPU and NIC interrupts.
1984  */
1985 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1986 				bool cpu_eq)
1987 {
1988 	int msi_vec;
1989 
1990 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1991 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1992 				GAUDI_EVENT_QUEUE_MSI_IDX);
1993 
1994 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1995 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1996 
1997 	return pci_irq_vector(hdev->pdev, msi_vec);
1998 }
1999 
2000 static int gaudi_enable_msi_single(struct hl_device *hdev)
2001 {
2002 	int rc, irq;
2003 
2004 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2005 
2006 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2007 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2008 			"gaudi single msi", hdev);
2009 	if (rc)
2010 		dev_err(hdev->dev,
2011 			"Failed to request single MSI IRQ\n");
2012 
2013 	return rc;
2014 }
2015 
2016 static int gaudi_enable_msi(struct hl_device *hdev)
2017 {
2018 	struct gaudi_device *gaudi = hdev->asic_specific;
2019 	int rc;
2020 
2021 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2022 		return 0;
2023 
2024 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2025 	if (rc < 0) {
2026 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2027 		return rc;
2028 	}
2029 
2030 	rc = gaudi_enable_msi_single(hdev);
2031 	if (rc)
2032 		goto free_pci_irq_vectors;
2033 
2034 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2035 
2036 	return 0;
2037 
2038 free_pci_irq_vectors:
2039 	pci_free_irq_vectors(hdev->pdev);
2040 	return rc;
2041 }
2042 
2043 static void gaudi_sync_irqs(struct hl_device *hdev)
2044 {
2045 	struct gaudi_device *gaudi = hdev->asic_specific;
2046 
2047 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2048 		return;
2049 
2050 	/* Wait for all pending IRQs to be finished */
2051 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2052 }
2053 
2054 static void gaudi_disable_msi(struct hl_device *hdev)
2055 {
2056 	struct gaudi_device *gaudi = hdev->asic_specific;
2057 
2058 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2059 		return;
2060 
2061 	gaudi_sync_irqs(hdev);
2062 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2063 	pci_free_irq_vectors(hdev->pdev);
2064 
2065 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2066 }
2067 
2068 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2069 {
2070 	struct gaudi_device *gaudi = hdev->asic_specific;
2071 
2072 	if (hdev->asic_prop.fw_security_enabled)
2073 		return;
2074 
2075 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2076 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2077 		return;
2078 
2079 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2080 		return;
2081 
2082 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2083 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2084 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2085 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2087 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2089 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2091 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2093 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2095 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2097 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098 
2099 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2100 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2102 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2104 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2106 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2108 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2110 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2112 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2114 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115 
2116 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2117 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2118 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2119 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2121 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2123 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2125 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2127 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2129 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2131 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132 
2133 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2134 }
2135 
2136 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2137 {
2138 	struct gaudi_device *gaudi = hdev->asic_specific;
2139 
2140 	if (hdev->asic_prop.fw_security_enabled)
2141 		return;
2142 
2143 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2144 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2145 		return;
2146 
2147 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2148 		return;
2149 
2150 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2151 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2157 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2159 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2161 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2163 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2165 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166 
2167 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2174 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2176 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2178 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2180 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2182 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183 
2184 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2185 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2191 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2193 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2195 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2197 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2199 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200 
2201 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2202 }
2203 
2204 static void gaudi_init_e2e(struct hl_device *hdev)
2205 {
2206 	if (hdev->asic_prop.fw_security_enabled)
2207 		return;
2208 
2209 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2211 		return;
2212 
2213 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2214 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2215 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2216 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2217 
2218 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2219 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2220 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2221 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2222 
2223 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2224 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2225 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2226 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2227 
2228 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2229 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2230 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2231 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2232 
2233 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2234 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2235 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2236 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2237 
2238 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2239 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2240 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2241 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2242 
2243 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2244 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2245 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2246 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2247 
2248 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2249 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2250 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2251 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2252 
2253 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2254 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2255 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2256 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2257 
2258 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2259 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2260 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2261 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2262 
2263 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2264 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2265 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2266 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2267 
2268 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2269 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2270 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2271 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2272 
2273 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2274 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2275 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2276 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2277 
2278 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2279 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2280 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2281 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2282 
2283 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2284 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2285 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2286 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2287 
2288 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2289 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2290 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2291 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2292 
2293 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2294 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2295 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2296 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297 
2298 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2299 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2300 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2301 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302 
2303 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2304 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2305 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2306 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2307 
2308 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2309 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2310 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2311 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2312 
2313 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2314 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2315 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2316 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2317 
2318 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2319 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2320 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2321 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2322 
2323 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2324 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2325 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2326 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2327 
2328 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2329 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2330 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2331 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2332 
2333 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2334 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2335 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2336 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337 
2338 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2339 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2340 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2341 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342 
2343 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2344 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2345 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2346 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347 
2348 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2349 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2350 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2351 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352 
2353 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2354 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2355 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2356 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357 
2358 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2359 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2360 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2361 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362 
2363 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2364 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2365 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2366 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367 
2368 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2369 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2370 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2371 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372 
2373 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2374 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2375 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2376 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377 
2378 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2379 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2380 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2381 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382 
2383 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2384 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2385 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2386 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387 
2388 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2389 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2390 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2391 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392 
2393 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2394 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2396 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397 
2398 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2399 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2401 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402 
2403 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2404 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2406 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407 
2408 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2409 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2411 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412 
2413 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2414 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2415 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2416 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417 
2418 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2419 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2420 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2421 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422 
2423 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2424 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2425 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2426 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427 
2428 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2429 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2430 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2431 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432 
2433 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2434 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2435 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2436 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437 
2438 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2439 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2440 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2441 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442 
2443 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2444 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2445 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2446 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447 
2448 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2449 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2450 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2451 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2452 }
2453 
2454 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455 {
2456 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457 
2458 	if (hdev->asic_prop.fw_security_enabled)
2459 		return;
2460 
2461 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2462 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2463 		return;
2464 
2465 	hbm0_wr = 0x33333333;
2466 	hbm0_rd = 0x77777777;
2467 	hbm1_wr = 0x55555555;
2468 	hbm1_rd = 0xDDDDDDDD;
2469 
2470 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2471 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2472 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2473 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474 
2475 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2476 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2477 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2478 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479 
2480 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2481 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2482 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2483 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484 
2485 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2486 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2487 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2488 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489 
2490 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2491 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2494 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2497 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2500 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502 
2503 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2504 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2507 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2510 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2513 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515 }
2516 
2517 static void gaudi_init_golden_registers(struct hl_device *hdev)
2518 {
2519 	u32 tpc_offset;
2520 	int tpc_id, i;
2521 
2522 	gaudi_init_e2e(hdev);
2523 	gaudi_init_hbm_cred(hdev);
2524 
2525 	for (tpc_id = 0, tpc_offset = 0;
2526 				tpc_id < TPC_NUMBER_OF_ENGINES;
2527 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2528 		/* Mask all arithmetic interrupts from TPC */
2529 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2530 		/* Set 16 cache lines */
2531 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2532 				ICACHE_FETCH_LINE_NUM, 2);
2533 	}
2534 
2535 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536 	for (i = 0 ; i < 128 ; i += 8)
2537 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2538 
2539 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 }
2544 
2545 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2546 					int qman_id, dma_addr_t qman_pq_addr)
2547 {
2548 	struct cpu_dyn_regs *dyn_regs =
2549 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2550 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2551 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2552 	u32 q_off, dma_qm_offset;
2553 	u32 dma_qm_err_cfg, irq_handler_offset;
2554 
2555 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556 
2557 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2558 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2559 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2560 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561 	so_base_en_lo = lower_32_bits(CFG_BASE +
2562 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2563 	so_base_en_hi = upper_32_bits(CFG_BASE +
2564 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2566 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2567 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2568 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2570 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2571 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2572 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573 
2574 	q_off = dma_qm_offset + qman_id * 4;
2575 
2576 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2577 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578 
2579 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2580 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2581 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582 
2583 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2584 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2585 							QMAN_LDMA_SRC_OFFSET);
2586 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2587 							QMAN_LDMA_DST_OFFSET);
2588 
2589 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2590 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2591 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2592 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2593 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2594 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2595 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2596 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597 
2598 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599 
2600 	/* The following configuration is needed only once per QMAN */
2601 	if (qman_id == 0) {
2602 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2603 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2604 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605 
2606 		/* Configure RAZWI IRQ */
2607 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2608 		if (hdev->stop_on_err)
2609 			dma_qm_err_cfg |=
2610 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611 
2612 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613 
2614 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2615 			lower_32_bits(CFG_BASE + irq_handler_offset));
2616 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2617 			upper_32_bits(CFG_BASE + irq_handler_offset));
2618 
2619 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2620 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2621 									dma_id);
2622 
2623 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2624 				QM_ARB_ERR_MSG_EN_MASK);
2625 
2626 		/* Set timeout to maximum */
2627 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2628 
2629 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630 				QMAN_EXTERNAL_MAKE_TRUSTED);
2631 
2632 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633 	}
2634 }
2635 
2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637 {
2638 	struct cpu_dyn_regs *dyn_regs =
2639 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642 	u32 irq_handler_offset;
2643 
2644 	/* Set to maximum possible according to physical size */
2645 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647 
2648 	/* WA for H/W bug H3-2116 */
2649 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650 
2651 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2652 	if (hdev->stop_on_err)
2653 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654 
2655 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656 
2657 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660 
2661 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662 		lower_32_bits(CFG_BASE + irq_handler_offset));
2663 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664 		upper_32_bits(CFG_BASE + irq_handler_offset));
2665 
2666 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2669 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670 	/* If the channel is secured, it should be in MMU bypass mode */
2671 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674 }
2675 
2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677 				u32 enable_mask)
2678 {
2679 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680 
2681 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682 }
2683 
2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685 {
2686 	struct gaudi_device *gaudi = hdev->asic_specific;
2687 	struct hl_hw_queue *q;
2688 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689 
2690 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691 		return;
2692 
2693 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694 		dma_id = gaudi_dma_assignment[i];
2695 		/*
2696 		 * For queues after the CPU Q need to add 1 to get the correct
2697 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698 		 * order to get the correct MSI register.
2699 		 */
2700 		if (dma_id > 1) {
2701 			cpu_skip = 1;
2702 			nic_skip = NIC_NUMBER_OF_ENGINES;
2703 		} else {
2704 			cpu_skip = 0;
2705 			nic_skip = 0;
2706 		}
2707 
2708 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709 			q_idx = 4 * dma_id + j + cpu_skip;
2710 			q = &hdev->kernel_queues[q_idx];
2711 			q->cq_id = cq_id++;
2712 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2714 						q->bus_address);
2715 		}
2716 
2717 		gaudi_init_dma_core(hdev, dma_id);
2718 
2719 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720 	}
2721 
2722 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723 }
2724 
2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726 					int qman_id, u64 qman_base_addr)
2727 {
2728 	struct cpu_dyn_regs *dyn_regs =
2729 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732 	u32 dma_qm_err_cfg, irq_handler_offset;
2733 	u32 q_off, dma_qm_offset;
2734 
2735 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736 
2737 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 	so_base_en_lo = lower_32_bits(CFG_BASE +
2742 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743 	so_base_en_hi = upper_32_bits(CFG_BASE +
2744 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2750 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2752 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753 
2754 	q_off = dma_qm_offset + qman_id * 4;
2755 
2756 	if (qman_id < 4) {
2757 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758 					lower_32_bits(qman_base_addr));
2759 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760 					upper_32_bits(qman_base_addr));
2761 
2762 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765 
2766 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767 							QMAN_CPDMA_SIZE_OFFSET);
2768 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769 							QMAN_CPDMA_SRC_OFFSET);
2770 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771 							QMAN_CPDMA_DST_OFFSET);
2772 	} else {
2773 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776 
2777 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778 							QMAN_LDMA_SIZE_OFFSET);
2779 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780 							QMAN_LDMA_SRC_OFFSET);
2781 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782 							QMAN_LDMA_DST_OFFSET);
2783 
2784 		/* Configure RAZWI IRQ */
2785 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786 		if (hdev->stop_on_err)
2787 			dma_qm_err_cfg |=
2788 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789 
2790 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791 
2792 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793 			lower_32_bits(CFG_BASE + irq_handler_offset));
2794 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795 			upper_32_bits(CFG_BASE + irq_handler_offset));
2796 
2797 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799 									dma_id);
2800 
2801 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802 				QM_ARB_ERR_MSG_EN_MASK);
2803 
2804 		/* Set timeout to maximum */
2805 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2806 
2807 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2808 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809 				QMAN_INTERNAL_MAKE_TRUSTED);
2810 	}
2811 
2812 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2813 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2814 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2815 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2816 
2817 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2819 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2820 				mtr_base_ws_lo);
2821 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2822 				mtr_base_ws_hi);
2823 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2824 				so_base_ws_lo);
2825 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2826 				so_base_ws_hi);
2827 	}
2828 }
2829 
2830 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2831 {
2832 	struct gaudi_device *gaudi = hdev->asic_specific;
2833 	struct gaudi_internal_qman_info *q;
2834 	u64 qman_base_addr;
2835 	int i, j, dma_id, internal_q_index;
2836 
2837 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2838 		return;
2839 
2840 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2841 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2842 
2843 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2844 			 /*
2845 			  * Add the CPU queue in order to get the correct queue
2846 			  * number as all internal queue are placed after it
2847 			  */
2848 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2849 
2850 			q = &gaudi->internal_qmans[internal_q_index];
2851 			qman_base_addr = (u64) q->pq_dma_addr;
2852 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2853 						qman_base_addr);
2854 		}
2855 
2856 		/* Initializing lower CP for HBM DMA QMAN */
2857 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2858 
2859 		gaudi_init_dma_core(hdev, dma_id);
2860 
2861 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2862 	}
2863 
2864 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2865 }
2866 
2867 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2868 					int qman_id, u64 qman_base_addr)
2869 {
2870 	struct cpu_dyn_regs *dyn_regs =
2871 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2872 	u32 mtr_base_lo, mtr_base_hi;
2873 	u32 so_base_lo, so_base_hi;
2874 	u32 irq_handler_offset;
2875 	u32 q_off, mme_id;
2876 	u32 mme_qm_err_cfg;
2877 
2878 	mtr_base_lo = lower_32_bits(CFG_BASE +
2879 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2880 	mtr_base_hi = upper_32_bits(CFG_BASE +
2881 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882 	so_base_lo = lower_32_bits(CFG_BASE +
2883 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2884 	so_base_hi = upper_32_bits(CFG_BASE +
2885 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886 
2887 	q_off = mme_offset + qman_id * 4;
2888 
2889 	if (qman_id < 4) {
2890 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2891 					lower_32_bits(qman_base_addr));
2892 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2893 					upper_32_bits(qman_base_addr));
2894 
2895 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2896 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2897 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2898 
2899 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2900 							QMAN_CPDMA_SIZE_OFFSET);
2901 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2902 							QMAN_CPDMA_SRC_OFFSET);
2903 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2904 							QMAN_CPDMA_DST_OFFSET);
2905 	} else {
2906 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2907 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2908 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2909 
2910 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911 							QMAN_LDMA_SIZE_OFFSET);
2912 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913 							QMAN_LDMA_SRC_OFFSET);
2914 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915 							QMAN_LDMA_DST_OFFSET);
2916 
2917 		/* Configure RAZWI IRQ */
2918 		mme_id = mme_offset /
2919 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2920 
2921 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2922 		if (hdev->stop_on_err)
2923 			mme_qm_err_cfg |=
2924 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2925 
2926 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2927 
2928 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2929 			lower_32_bits(CFG_BASE + irq_handler_offset));
2930 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2931 			upper_32_bits(CFG_BASE + irq_handler_offset));
2932 
2933 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2934 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2935 									mme_id);
2936 
2937 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2938 				QM_ARB_ERR_MSG_EN_MASK);
2939 
2940 		/* Set timeout to maximum */
2941 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2942 
2943 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945 				QMAN_INTERNAL_MAKE_TRUSTED);
2946 	}
2947 
2948 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952 }
2953 
2954 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955 {
2956 	struct gaudi_device *gaudi = hdev->asic_specific;
2957 	struct gaudi_internal_qman_info *q;
2958 	u64 qman_base_addr;
2959 	u32 mme_offset;
2960 	int i, internal_q_index;
2961 
2962 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963 		return;
2964 
2965 	/*
2966 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968 	 */
2969 
2970 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971 
2972 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974 		q = &gaudi->internal_qmans[internal_q_index];
2975 		qman_base_addr = (u64) q->pq_dma_addr;
2976 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977 					qman_base_addr);
2978 		if (i == 3)
2979 			mme_offset = 0;
2980 	}
2981 
2982 	/* Initializing lower CP for MME QMANs */
2983 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2986 
2987 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989 
2990 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2991 }
2992 
2993 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994 				int qman_id, u64 qman_base_addr)
2995 {
2996 	struct cpu_dyn_regs *dyn_regs =
2997 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000 	u32 tpc_qm_err_cfg, irq_handler_offset;
3001 	u32 q_off, tpc_id;
3002 
3003 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 	so_base_en_lo = lower_32_bits(CFG_BASE +
3008 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009 	so_base_en_hi = upper_32_bits(CFG_BASE +
3010 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3016 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3018 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019 
3020 	q_off = tpc_offset + qman_id * 4;
3021 
3022 	tpc_id = tpc_offset /
3023 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024 
3025 	if (qman_id < 4) {
3026 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027 					lower_32_bits(qman_base_addr));
3028 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029 					upper_32_bits(qman_base_addr));
3030 
3031 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034 
3035 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036 							QMAN_CPDMA_SIZE_OFFSET);
3037 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038 							QMAN_CPDMA_SRC_OFFSET);
3039 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040 							QMAN_CPDMA_DST_OFFSET);
3041 	} else {
3042 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045 
3046 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047 							QMAN_LDMA_SIZE_OFFSET);
3048 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049 							QMAN_LDMA_SRC_OFFSET);
3050 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051 							QMAN_LDMA_DST_OFFSET);
3052 
3053 		/* Configure RAZWI IRQ */
3054 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055 		if (hdev->stop_on_err)
3056 			tpc_qm_err_cfg |=
3057 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058 
3059 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060 
3061 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062 			lower_32_bits(CFG_BASE + irq_handler_offset));
3063 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064 			upper_32_bits(CFG_BASE + irq_handler_offset));
3065 
3066 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068 									tpc_id);
3069 
3070 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071 				QM_ARB_ERR_MSG_EN_MASK);
3072 
3073 		/* Set timeout to maximum */
3074 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3075 
3076 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3077 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3078 				QMAN_INTERNAL_MAKE_TRUSTED);
3079 	}
3080 
3081 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3082 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3083 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3084 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3085 
3086 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3087 	if (tpc_id == 6) {
3088 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3089 				mtr_base_ws_lo);
3090 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3091 				mtr_base_ws_hi);
3092 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3093 				so_base_ws_lo);
3094 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3095 				so_base_ws_hi);
3096 	}
3097 }
3098 
3099 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3100 {
3101 	struct gaudi_device *gaudi = hdev->asic_specific;
3102 	struct gaudi_internal_qman_info *q;
3103 	u64 qman_base_addr;
3104 	u32 so_base_hi, tpc_offset = 0;
3105 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3106 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3107 	int i, tpc_id, internal_q_index;
3108 
3109 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3110 		return;
3111 
3112 	so_base_hi = upper_32_bits(CFG_BASE +
3113 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114 
3115 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3116 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3117 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3118 						tpc_id * QMAN_STREAMS + i;
3119 			q = &gaudi->internal_qmans[internal_q_index];
3120 			qman_base_addr = (u64) q->pq_dma_addr;
3121 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3122 						qman_base_addr);
3123 
3124 			if (i == 3) {
3125 				/* Initializing lower CP for TPC QMAN */
3126 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3127 
3128 				/* Enable the QMAN and TPC channel */
3129 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3130 						QMAN_TPC_ENABLE);
3131 			}
3132 		}
3133 
3134 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3135 				so_base_hi);
3136 
3137 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3138 
3139 		gaudi->hw_cap_initialized |=
3140 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3141 	}
3142 }
3143 
3144 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3145 				int qman_id, u64 qman_base_addr, int nic_id)
3146 {
3147 	struct cpu_dyn_regs *dyn_regs =
3148 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3149 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3150 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3151 	u32 nic_qm_err_cfg, irq_handler_offset;
3152 	u32 q_off;
3153 
3154 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3155 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3156 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3157 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3159 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3160 	so_base_en_hi = upper_32_bits(CFG_BASE +
3161 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3163 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3164 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3165 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3167 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3168 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3169 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170 
3171 	q_off = nic_offset + qman_id * 4;
3172 
3173 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3174 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3175 
3176 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3177 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3178 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3179 
3180 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3181 							QMAN_LDMA_SIZE_OFFSET);
3182 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3183 							QMAN_LDMA_SRC_OFFSET);
3184 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3185 							QMAN_LDMA_DST_OFFSET);
3186 
3187 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3188 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3189 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3190 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3191 
3192 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3194 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3195 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3196 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3197 
3198 	if (qman_id == 0) {
3199 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3200 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3201 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3202 
3203 		/* Configure RAZWI IRQ */
3204 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3205 		if (hdev->stop_on_err)
3206 			nic_qm_err_cfg |=
3207 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3208 
3209 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3210 
3211 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3212 			lower_32_bits(CFG_BASE + irq_handler_offset));
3213 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3214 			upper_32_bits(CFG_BASE + irq_handler_offset));
3215 
3216 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3217 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3218 									nic_id);
3219 
3220 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3221 				QM_ARB_ERR_MSG_EN_MASK);
3222 
3223 		/* Set timeout to maximum */
3224 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3225 
3226 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3227 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3228 				QMAN_INTERNAL_MAKE_TRUSTED);
3229 	}
3230 }
3231 
3232 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3233 {
3234 	struct gaudi_device *gaudi = hdev->asic_specific;
3235 	struct gaudi_internal_qman_info *q;
3236 	u64 qman_base_addr;
3237 	u32 nic_offset = 0;
3238 	u32 nic_delta_between_qmans =
3239 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240 	u32 nic_delta_between_nics =
3241 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 	int i, nic_id, internal_q_index;
3243 
3244 	if (!hdev->nic_ports_mask)
3245 		return;
3246 
3247 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3248 		return;
3249 
3250 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3251 
3252 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3253 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3254 			nic_offset += nic_delta_between_qmans;
3255 			if (nic_id & 1) {
3256 				nic_offset -= (nic_delta_between_qmans * 2);
3257 				nic_offset += nic_delta_between_nics;
3258 			}
3259 			continue;
3260 		}
3261 
3262 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3263 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3264 						nic_id * QMAN_STREAMS + i;
3265 			q = &gaudi->internal_qmans[internal_q_index];
3266 			qman_base_addr = (u64) q->pq_dma_addr;
3267 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3268 						qman_base_addr, nic_id);
3269 		}
3270 
3271 		/* Enable the QMAN */
3272 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3273 
3274 		nic_offset += nic_delta_between_qmans;
3275 		if (nic_id & 1) {
3276 			nic_offset -= (nic_delta_between_qmans * 2);
3277 			nic_offset += nic_delta_between_nics;
3278 		}
3279 
3280 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3281 	}
3282 }
3283 
3284 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3285 {
3286 	struct gaudi_device *gaudi = hdev->asic_specific;
3287 
3288 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3289 		return;
3290 
3291 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3292 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3293 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3294 }
3295 
3296 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3297 {
3298 	struct gaudi_device *gaudi = hdev->asic_specific;
3299 
3300 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3301 		return;
3302 
3303 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3304 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3305 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3306 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3307 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3308 }
3309 
3310 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3311 {
3312 	struct gaudi_device *gaudi = hdev->asic_specific;
3313 
3314 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3315 		return;
3316 
3317 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3318 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3319 }
3320 
3321 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3322 {
3323 	struct gaudi_device *gaudi = hdev->asic_specific;
3324 	u32 tpc_offset = 0;
3325 	int tpc_id;
3326 
3327 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3328 		return;
3329 
3330 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3331 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3332 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3333 	}
3334 }
3335 
3336 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3337 {
3338 	struct gaudi_device *gaudi = hdev->asic_specific;
3339 	u32 nic_mask, nic_offset = 0;
3340 	u32 nic_delta_between_qmans =
3341 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3342 	u32 nic_delta_between_nics =
3343 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 	int nic_id;
3345 
3346 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3347 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348 
3349 		if (gaudi->hw_cap_initialized & nic_mask)
3350 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3351 
3352 		nic_offset += nic_delta_between_qmans;
3353 		if (nic_id & 1) {
3354 			nic_offset -= (nic_delta_between_qmans * 2);
3355 			nic_offset += nic_delta_between_nics;
3356 		}
3357 	}
3358 }
3359 
3360 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3361 {
3362 	struct gaudi_device *gaudi = hdev->asic_specific;
3363 
3364 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3365 		return;
3366 
3367 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 }
3372 
3373 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3374 {
3375 	struct gaudi_device *gaudi = hdev->asic_specific;
3376 
3377 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378 		return;
3379 
3380 	/* Stop CPs of HBM DMA QMANs */
3381 
3382 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 }
3388 
3389 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3390 {
3391 	struct gaudi_device *gaudi = hdev->asic_specific;
3392 
3393 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3394 		return;
3395 
3396 	/* Stop CPs of MME QMANs */
3397 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399 }
3400 
3401 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3402 {
3403 	struct gaudi_device *gaudi = hdev->asic_specific;
3404 
3405 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3406 		return;
3407 
3408 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 }
3417 
3418 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3419 {
3420 	struct gaudi_device *gaudi = hdev->asic_specific;
3421 
3422 	/* Stop upper CPs of QMANs */
3423 
3424 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3425 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3426 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3427 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3428 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3429 
3430 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3431 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3432 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3433 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3434 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3435 
3436 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3437 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3438 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3439 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3440 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3441 
3442 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3443 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3444 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3445 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3446 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3447 
3448 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3449 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3450 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3451 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3452 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3453 
3454 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3455 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3456 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3457 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3458 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3459 
3460 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3461 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3462 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3463 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3464 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3465 
3466 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3467 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3468 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3469 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3470 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3471 
3472 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3473 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3474 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3475 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3476 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3477 
3478 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3479 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3480 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3481 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3482 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3483 }
3484 
3485 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3486 {
3487 	struct gaudi_device *gaudi = hdev->asic_specific;
3488 
3489 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3490 		return;
3491 
3492 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 }
3496 
3497 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3498 {
3499 	struct gaudi_device *gaudi = hdev->asic_specific;
3500 
3501 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3502 		return;
3503 
3504 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 }
3510 
3511 static void gaudi_mme_stall(struct hl_device *hdev)
3512 {
3513 	struct gaudi_device *gaudi = hdev->asic_specific;
3514 
3515 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3516 		return;
3517 
3518 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535 }
3536 
3537 static void gaudi_tpc_stall(struct hl_device *hdev)
3538 {
3539 	struct gaudi_device *gaudi = hdev->asic_specific;
3540 
3541 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3542 		return;
3543 
3544 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 }
3553 
3554 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3555 {
3556 	u32 qman_offset;
3557 	int i;
3558 
3559 	if (hdev->asic_prop.fw_security_enabled)
3560 		return;
3561 
3562 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3563 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3564 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3565 
3566 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3567 	}
3568 
3569 	WREG32(mmMME0_QM_CGM_CFG, 0);
3570 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3571 	WREG32(mmMME2_QM_CGM_CFG, 0);
3572 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3573 
3574 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3575 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3576 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3577 
3578 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3579 	}
3580 }
3581 
3582 static void gaudi_enable_timestamp(struct hl_device *hdev)
3583 {
3584 	/* Disable the timestamp counter */
3585 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3586 
3587 	/* Zero the lower/upper parts of the 64-bit counter */
3588 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3589 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3590 
3591 	/* Enable the counter */
3592 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3593 }
3594 
3595 static void gaudi_disable_timestamp(struct hl_device *hdev)
3596 {
3597 	/* Disable the timestamp counter */
3598 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3599 }
3600 
3601 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3602 {
3603 	u32 wait_timeout_ms;
3604 
3605 	if (hdev->pldm)
3606 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3607 	else
3608 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3609 
3610 	if (fw_reset)
3611 		goto skip_engines;
3612 
3613 	gaudi_stop_nic_qmans(hdev);
3614 	gaudi_stop_mme_qmans(hdev);
3615 	gaudi_stop_tpc_qmans(hdev);
3616 	gaudi_stop_hbm_dma_qmans(hdev);
3617 	gaudi_stop_pci_dma_qmans(hdev);
3618 
3619 	msleep(wait_timeout_ms);
3620 
3621 	gaudi_pci_dma_stall(hdev);
3622 	gaudi_hbm_dma_stall(hdev);
3623 	gaudi_tpc_stall(hdev);
3624 	gaudi_mme_stall(hdev);
3625 
3626 	msleep(wait_timeout_ms);
3627 
3628 	gaudi_disable_nic_qmans(hdev);
3629 	gaudi_disable_mme_qmans(hdev);
3630 	gaudi_disable_tpc_qmans(hdev);
3631 	gaudi_disable_hbm_dma_qmans(hdev);
3632 	gaudi_disable_pci_dma_qmans(hdev);
3633 
3634 	gaudi_disable_timestamp(hdev);
3635 
3636 skip_engines:
3637 	gaudi_disable_msi(hdev);
3638 }
3639 
3640 static int gaudi_mmu_init(struct hl_device *hdev)
3641 {
3642 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3643 	struct gaudi_device *gaudi = hdev->asic_specific;
3644 	u64 hop0_addr;
3645 	int rc, i;
3646 
3647 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3648 		return 0;
3649 
3650 	for (i = 0 ; i < prop->max_asid ; i++) {
3651 		hop0_addr = prop->mmu_pgt_addr +
3652 				(i * prop->dmmu.hop_table_size);
3653 
3654 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3655 		if (rc) {
3656 			dev_err(hdev->dev,
3657 				"failed to set hop0 addr for asid %d\n", i);
3658 			return rc;
3659 		}
3660 	}
3661 
3662 	/* init MMU cache manage page */
3663 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3664 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3665 
3666 	/* mem cache invalidation */
3667 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3668 
3669 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3670 	if (rc)
3671 		return rc;
3672 
3673 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3674 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3675 
3676 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3677 
3678 	/*
3679 	 * The H/W expects the first PI after init to be 1. After wraparound
3680 	 * we'll write 0.
3681 	 */
3682 	gaudi->mmu_cache_inv_pi = 1;
3683 
3684 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3685 
3686 	return 0;
3687 }
3688 
3689 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3690 {
3691 	void __iomem *dst;
3692 
3693 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3694 
3695 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3696 }
3697 
3698 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3699 {
3700 	void __iomem *dst;
3701 
3702 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3703 
3704 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3705 }
3706 
3707 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3708 {
3709 	struct dynamic_fw_load_mgr *dynamic_loader;
3710 	struct cpu_dyn_regs *dyn_regs;
3711 
3712 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3713 
3714 	/*
3715 	 * here we update initial values for few specific dynamic regs (as
3716 	 * before reading the first descriptor from FW those value has to be
3717 	 * hard-coded) in later stages of the protocol those values will be
3718 	 * updated automatically by reading the FW descriptor so data there
3719 	 * will always be up-to-date
3720 	 */
3721 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3722 	dyn_regs->kmd_msg_to_cpu =
3723 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3724 	dyn_regs->cpu_cmd_status_to_host =
3725 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3726 
3727 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3728 }
3729 
3730 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3731 {
3732 	struct static_fw_load_mgr *static_loader;
3733 
3734 	static_loader = &hdev->fw_loader.static_loader;
3735 
3736 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3739 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3740 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3741 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3742 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3743 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3744 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3745 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3746 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3747 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3748 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3749 			GAUDI_PLDM_RESET_WAIT_MSEC :
3750 			GAUDI_CPU_RESET_WAIT_MSEC;
3751 }
3752 
3753 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3754 {
3755 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3756 
3757 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3758 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3759 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3760 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3761 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3762 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3763 }
3764 
3765 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3766 {
3767 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3768 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3769 
3770 	/* fill common fields */
3771 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3772 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3773 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3774 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3775 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3776 	fw_loader->skip_bmc = !hdev->bmc_enable;
3777 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3778 	fw_loader->dram_bar_id = HBM_BAR_ID;
3779 
3780 	if (prop->dynamic_fw_load)
3781 		gaudi_init_dynamic_firmware_loader(hdev);
3782 	else
3783 		gaudi_init_static_firmware_loader(hdev);
3784 }
3785 
3786 static int gaudi_init_cpu(struct hl_device *hdev)
3787 {
3788 	struct gaudi_device *gaudi = hdev->asic_specific;
3789 	int rc;
3790 
3791 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3792 		return 0;
3793 
3794 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3795 		return 0;
3796 
3797 	/*
3798 	 * The device CPU works with 40 bits addresses.
3799 	 * This register sets the extension to 50 bits.
3800 	 */
3801 	if (!hdev->asic_prop.fw_security_enabled)
3802 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3803 
3804 	rc = hl_fw_init_cpu(hdev);
3805 
3806 	if (rc)
3807 		return rc;
3808 
3809 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3810 
3811 	return 0;
3812 }
3813 
3814 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3815 {
3816 	struct cpu_dyn_regs *dyn_regs =
3817 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3818 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3819 	struct gaudi_device *gaudi = hdev->asic_specific;
3820 	u32 status, irq_handler_offset;
3821 	struct hl_eq *eq;
3822 	struct hl_hw_queue *cpu_pq =
3823 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3824 	int err;
3825 
3826 	if (!hdev->cpu_queues_enable)
3827 		return 0;
3828 
3829 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3830 		return 0;
3831 
3832 	eq = &hdev->event_queue;
3833 
3834 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3835 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3836 
3837 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3838 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3839 
3840 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3841 			lower_32_bits(hdev->cpu_accessible_dma_address));
3842 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3843 			upper_32_bits(hdev->cpu_accessible_dma_address));
3844 
3845 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3846 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3847 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3848 
3849 	/* Used for EQ CI */
3850 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3851 
3852 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3853 
3854 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3855 
3856 	irq_handler_offset = prop->gic_interrupts_enable ?
3857 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3858 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3859 
3860 	WREG32(irq_handler_offset,
3861 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3862 
3863 	err = hl_poll_timeout(
3864 		hdev,
3865 		mmCPU_IF_QUEUE_INIT,
3866 		status,
3867 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3868 		1000,
3869 		cpu_timeout);
3870 
3871 	if (err) {
3872 		dev_err(hdev->dev,
3873 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3874 		return -EIO;
3875 	}
3876 
3877 	/* update FW application security bits */
3878 	if (prop->fw_cpu_boot_dev_sts0_valid)
3879 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3880 	if (prop->fw_cpu_boot_dev_sts1_valid)
3881 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3882 
3883 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3884 	return 0;
3885 }
3886 
3887 static void gaudi_pre_hw_init(struct hl_device *hdev)
3888 {
3889 	/* Perform read from the device to make sure device is up */
3890 	RREG32(mmHW_STATE);
3891 
3892 	if (!hdev->asic_prop.fw_security_enabled) {
3893 		/* Set the access through PCI bars (Linux driver only) as
3894 		 * secured
3895 		 */
3896 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3897 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3898 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3899 
3900 		/* Perform read to flush the waiting writes to ensure
3901 		 * configuration was set in the device
3902 		 */
3903 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3904 	}
3905 
3906 	/*
3907 	 * Let's mark in the H/W that we have reached this point. We check
3908 	 * this value in the reset_before_init function to understand whether
3909 	 * we need to reset the chip before doing H/W init. This register is
3910 	 * cleared by the H/W upon H/W reset
3911 	 */
3912 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3913 }
3914 
3915 static int gaudi_hw_init(struct hl_device *hdev)
3916 {
3917 	struct gaudi_device *gaudi = hdev->asic_specific;
3918 	int rc;
3919 
3920 	gaudi_pre_hw_init(hdev);
3921 
3922 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923 	 * So we set it here and if anyone tries to move it later to
3924 	 * a different address, there will be an error
3925 	 */
3926 	if (hdev->asic_prop.iatu_done_by_fw)
3927 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3928 
3929 	/*
3930 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3931 	 * base address of dram
3932 	 */
3933 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3934 		dev_err(hdev->dev,
3935 			"failed to map HBM bar to DRAM base address\n");
3936 		return -EIO;
3937 	}
3938 
3939 	rc = gaudi_init_cpu(hdev);
3940 	if (rc) {
3941 		dev_err(hdev->dev, "failed to initialize CPU\n");
3942 		return rc;
3943 	}
3944 
3945 	/* In case the clock gating was enabled in preboot we need to disable
3946 	 * it here before touching the MME/TPC registers.
3947 	 */
3948 	gaudi_disable_clock_gating(hdev);
3949 
3950 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3951 	gaudi_init_scrambler_sram(hdev);
3952 
3953 	/* This is here just in case we are working without CPU */
3954 	gaudi_init_scrambler_hbm(hdev);
3955 
3956 	gaudi_init_golden_registers(hdev);
3957 
3958 	rc = gaudi_mmu_init(hdev);
3959 	if (rc)
3960 		return rc;
3961 
3962 	gaudi_init_security(hdev);
3963 
3964 	gaudi_init_pci_dma_qmans(hdev);
3965 
3966 	gaudi_init_hbm_dma_qmans(hdev);
3967 
3968 	gaudi_init_mme_qmans(hdev);
3969 
3970 	gaudi_init_tpc_qmans(hdev);
3971 
3972 	gaudi_init_nic_qmans(hdev);
3973 
3974 	gaudi_enable_timestamp(hdev);
3975 
3976 	/* MSI must be enabled before CPU queues and NIC are initialized */
3977 	rc = gaudi_enable_msi(hdev);
3978 	if (rc)
3979 		goto disable_queues;
3980 
3981 	/* must be called after MSI was enabled */
3982 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3983 	if (rc) {
3984 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3985 			rc);
3986 		goto disable_msi;
3987 	}
3988 
3989 	/* Perform read from the device to flush all configuration */
3990 	RREG32(mmHW_STATE);
3991 
3992 	return 0;
3993 
3994 disable_msi:
3995 	gaudi_disable_msi(hdev);
3996 disable_queues:
3997 	gaudi_disable_mme_qmans(hdev);
3998 	gaudi_disable_pci_dma_qmans(hdev);
3999 
4000 	return rc;
4001 }
4002 
4003 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4004 {
4005 	struct cpu_dyn_regs *dyn_regs =
4006 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4008 	struct gaudi_device *gaudi = hdev->asic_specific;
4009 	bool driver_performs_reset;
4010 
4011 	if (!hard_reset) {
4012 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4013 		return 0;
4014 	}
4015 
4016 	if (hdev->pldm) {
4017 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4018 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4019 	} else {
4020 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4021 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4022 	}
4023 
4024 	if (fw_reset) {
4025 		dev_dbg(hdev->dev,
4026 			"Firmware performs HARD reset, going to wait %dms\n",
4027 			reset_timeout_ms);
4028 
4029 		goto skip_reset;
4030 	}
4031 
4032 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4033 					!hdev->asic_prop.hard_reset_done_by_fw);
4034 
4035 	/* Set device to handle FLR by H/W as we will put the device CPU to
4036 	 * halt mode
4037 	 */
4038 	if (driver_performs_reset)
4039 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4040 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4041 
4042 	/* If linux is loaded in the device CPU we need to communicate with it
4043 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044 	 * registers in case of old F/Ws
4045 	 */
4046 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4047 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4048 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4049 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4050 
4051 		WREG32(irq_handler_offset,
4052 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4053 
4054 		/* This is a hail-mary attempt to revive the card in the small chance that the
4055 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4056 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4057 		 * reset as if Linux wasn't loaded.
4058 		 *
4059 		 * We do it only if the reset cause was HB, because that would be the indication
4060 		 * of such an event.
4061 		 *
4062 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4063 		 * damage.
4064 		 */
4065 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4066 			if (hdev->asic_prop.hard_reset_done_by_fw)
4067 				hl_fw_ask_hard_reset_without_linux(hdev);
4068 			else
4069 				hl_fw_ask_halt_machine_without_linux(hdev);
4070 		}
4071 	} else {
4072 		if (hdev->asic_prop.hard_reset_done_by_fw)
4073 			hl_fw_ask_hard_reset_without_linux(hdev);
4074 		else
4075 			hl_fw_ask_halt_machine_without_linux(hdev);
4076 	}
4077 
4078 	if (driver_performs_reset) {
4079 
4080 		/* Configure the reset registers. Must be done as early as
4081 		 * possible in case we fail during H/W initialization
4082 		 */
4083 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4084 						(CFG_RST_H_DMA_MASK |
4085 						CFG_RST_H_MME_MASK |
4086 						CFG_RST_H_SM_MASK |
4087 						CFG_RST_H_TPC_7_MASK));
4088 
4089 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4090 
4091 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4092 						(CFG_RST_H_HBM_MASK |
4093 						CFG_RST_H_TPC_7_MASK |
4094 						CFG_RST_H_NIC_MASK |
4095 						CFG_RST_H_SM_MASK |
4096 						CFG_RST_H_DMA_MASK |
4097 						CFG_RST_H_MME_MASK |
4098 						CFG_RST_H_CPU_MASK |
4099 						CFG_RST_H_MMU_MASK));
4100 
4101 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4102 						(CFG_RST_L_IF_MASK |
4103 						CFG_RST_L_PSOC_MASK |
4104 						CFG_RST_L_TPC_MASK));
4105 
4106 		msleep(cpu_timeout_ms);
4107 
4108 		/* Tell ASIC not to re-initialize PCIe */
4109 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4110 
4111 		/* Restart BTL/BLR upon hard-reset */
4112 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4113 
4114 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4115 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4116 
4117 		dev_dbg(hdev->dev,
4118 			"Issued HARD reset command, going to wait %dms\n",
4119 			reset_timeout_ms);
4120 	} else {
4121 		dev_dbg(hdev->dev,
4122 			"Firmware performs HARD reset, going to wait %dms\n",
4123 			reset_timeout_ms);
4124 	}
4125 
4126 skip_reset:
4127 	/*
4128 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4129 	 * itself is in reset. Need to wait until the reset is deasserted
4130 	 */
4131 	msleep(reset_timeout_ms);
4132 
4133 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4134 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4135 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4136 		return -ETIMEDOUT;
4137 	}
4138 
4139 	if (gaudi) {
4140 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4141 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4142 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4143 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4144 						HW_CAP_HBM_SCRAMBLER);
4145 
4146 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4147 
4148 		hdev->device_cpu_is_halted = false;
4149 	}
4150 	return 0;
4151 }
4152 
4153 static int gaudi_suspend(struct hl_device *hdev)
4154 {
4155 	return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4156 }
4157 
4158 static int gaudi_resume(struct hl_device *hdev)
4159 {
4160 	return gaudi_init_iatu(hdev);
4161 }
4162 
4163 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4164 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4165 {
4166 	int rc;
4167 
4168 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4169 			VM_DONTCOPY | VM_NORESERVE);
4170 
4171 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4172 				(dma_addr - HOST_PHYS_BASE), size);
4173 	if (rc)
4174 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4175 
4176 	return rc;
4177 }
4178 
4179 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4180 {
4181 	struct cpu_dyn_regs *dyn_regs =
4182 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4183 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4184 	struct gaudi_device *gaudi = hdev->asic_specific;
4185 	bool invalid_queue = false;
4186 	int dma_id;
4187 
4188 	switch (hw_queue_id) {
4189 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4190 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4191 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4192 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4193 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4194 		break;
4195 
4196 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4197 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4198 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4199 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4200 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4201 		break;
4202 
4203 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4204 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4205 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4206 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4207 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4208 		break;
4209 
4210 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4211 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4212 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4213 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4214 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4215 		break;
4216 
4217 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4218 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4219 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4220 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4221 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4222 		break;
4223 
4224 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4225 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4226 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4227 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4228 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4229 		break;
4230 
4231 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4232 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4233 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4234 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4235 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4236 		break;
4237 
4238 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4239 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4240 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4241 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4242 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4243 		break;
4244 
4245 	case GAUDI_QUEUE_ID_CPU_PQ:
4246 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4247 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4248 		else
4249 			invalid_queue = true;
4250 		break;
4251 
4252 	case GAUDI_QUEUE_ID_MME_0_0:
4253 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4254 		break;
4255 
4256 	case GAUDI_QUEUE_ID_MME_0_1:
4257 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4258 		break;
4259 
4260 	case GAUDI_QUEUE_ID_MME_0_2:
4261 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4262 		break;
4263 
4264 	case GAUDI_QUEUE_ID_MME_0_3:
4265 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4266 		break;
4267 
4268 	case GAUDI_QUEUE_ID_MME_1_0:
4269 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4270 		break;
4271 
4272 	case GAUDI_QUEUE_ID_MME_1_1:
4273 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4274 		break;
4275 
4276 	case GAUDI_QUEUE_ID_MME_1_2:
4277 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4278 		break;
4279 
4280 	case GAUDI_QUEUE_ID_MME_1_3:
4281 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4282 		break;
4283 
4284 	case GAUDI_QUEUE_ID_TPC_0_0:
4285 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4286 		break;
4287 
4288 	case GAUDI_QUEUE_ID_TPC_0_1:
4289 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4290 		break;
4291 
4292 	case GAUDI_QUEUE_ID_TPC_0_2:
4293 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4294 		break;
4295 
4296 	case GAUDI_QUEUE_ID_TPC_0_3:
4297 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4298 		break;
4299 
4300 	case GAUDI_QUEUE_ID_TPC_1_0:
4301 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4302 		break;
4303 
4304 	case GAUDI_QUEUE_ID_TPC_1_1:
4305 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4306 		break;
4307 
4308 	case GAUDI_QUEUE_ID_TPC_1_2:
4309 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4310 		break;
4311 
4312 	case GAUDI_QUEUE_ID_TPC_1_3:
4313 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4314 		break;
4315 
4316 	case GAUDI_QUEUE_ID_TPC_2_0:
4317 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4318 		break;
4319 
4320 	case GAUDI_QUEUE_ID_TPC_2_1:
4321 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4322 		break;
4323 
4324 	case GAUDI_QUEUE_ID_TPC_2_2:
4325 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4326 		break;
4327 
4328 	case GAUDI_QUEUE_ID_TPC_2_3:
4329 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4330 		break;
4331 
4332 	case GAUDI_QUEUE_ID_TPC_3_0:
4333 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4334 		break;
4335 
4336 	case GAUDI_QUEUE_ID_TPC_3_1:
4337 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4338 		break;
4339 
4340 	case GAUDI_QUEUE_ID_TPC_3_2:
4341 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4342 		break;
4343 
4344 	case GAUDI_QUEUE_ID_TPC_3_3:
4345 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4346 		break;
4347 
4348 	case GAUDI_QUEUE_ID_TPC_4_0:
4349 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4350 		break;
4351 
4352 	case GAUDI_QUEUE_ID_TPC_4_1:
4353 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4354 		break;
4355 
4356 	case GAUDI_QUEUE_ID_TPC_4_2:
4357 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4358 		break;
4359 
4360 	case GAUDI_QUEUE_ID_TPC_4_3:
4361 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4362 		break;
4363 
4364 	case GAUDI_QUEUE_ID_TPC_5_0:
4365 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4366 		break;
4367 
4368 	case GAUDI_QUEUE_ID_TPC_5_1:
4369 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4370 		break;
4371 
4372 	case GAUDI_QUEUE_ID_TPC_5_2:
4373 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4374 		break;
4375 
4376 	case GAUDI_QUEUE_ID_TPC_5_3:
4377 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4378 		break;
4379 
4380 	case GAUDI_QUEUE_ID_TPC_6_0:
4381 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4382 		break;
4383 
4384 	case GAUDI_QUEUE_ID_TPC_6_1:
4385 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4386 		break;
4387 
4388 	case GAUDI_QUEUE_ID_TPC_6_2:
4389 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4390 		break;
4391 
4392 	case GAUDI_QUEUE_ID_TPC_6_3:
4393 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4394 		break;
4395 
4396 	case GAUDI_QUEUE_ID_TPC_7_0:
4397 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4398 		break;
4399 
4400 	case GAUDI_QUEUE_ID_TPC_7_1:
4401 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4402 		break;
4403 
4404 	case GAUDI_QUEUE_ID_TPC_7_2:
4405 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4406 		break;
4407 
4408 	case GAUDI_QUEUE_ID_TPC_7_3:
4409 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4410 		break;
4411 
4412 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4413 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4414 			invalid_queue = true;
4415 
4416 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4417 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4418 		break;
4419 
4420 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4421 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4422 			invalid_queue = true;
4423 
4424 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4425 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4426 		break;
4427 
4428 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4429 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4430 			invalid_queue = true;
4431 
4432 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4434 		break;
4435 
4436 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4437 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4438 			invalid_queue = true;
4439 
4440 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4442 		break;
4443 
4444 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4445 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4446 			invalid_queue = true;
4447 
4448 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4450 		break;
4451 
4452 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4453 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4454 			invalid_queue = true;
4455 
4456 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4458 		break;
4459 
4460 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4461 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4462 			invalid_queue = true;
4463 
4464 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4466 		break;
4467 
4468 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4469 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4470 			invalid_queue = true;
4471 
4472 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4474 		break;
4475 
4476 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4477 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4478 			invalid_queue = true;
4479 
4480 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4482 		break;
4483 
4484 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4485 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4486 			invalid_queue = true;
4487 
4488 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4490 		break;
4491 
4492 	default:
4493 		invalid_queue = true;
4494 	}
4495 
4496 	if (invalid_queue) {
4497 		/* Should never get here */
4498 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4499 			hw_queue_id);
4500 		return;
4501 	}
4502 
4503 	db_value = pi;
4504 
4505 	/* ring the doorbell */
4506 	WREG32(db_reg_offset, db_value);
4507 
4508 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4509 		/* make sure device CPU will read latest data from host */
4510 		mb();
4511 
4512 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4513 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4514 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4515 
4516 		WREG32(irq_handler_offset,
4517 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4518 	}
4519 }
4520 
4521 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4522 				struct hl_bd *bd)
4523 {
4524 	__le64 *pbd = (__le64 *) bd;
4525 
4526 	/* The QMANs are on the host memory so a simple copy suffice */
4527 	pqe[0] = pbd[0];
4528 	pqe[1] = pbd[1];
4529 }
4530 
4531 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4532 					dma_addr_t *dma_handle, gfp_t flags)
4533 {
4534 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4535 						dma_handle, flags);
4536 
4537 	/* Shift to the device's base physical address of host memory */
4538 	if (kernel_addr)
4539 		*dma_handle += HOST_PHYS_BASE;
4540 
4541 	return kernel_addr;
4542 }
4543 
4544 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4545 		void *cpu_addr, dma_addr_t dma_handle)
4546 {
4547 	/* Cancel the device's base physical address of host memory */
4548 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4549 
4550 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4551 }
4552 
4553 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4554 {
4555 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4556 	u64 cur_addr = prop->dram_user_base_address;
4557 	u32 chunk_size, busy;
4558 	int rc, dma_id;
4559 
4560 	while (cur_addr < prop->dram_end_address) {
4561 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4562 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4563 
4564 			chunk_size =
4565 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4566 
4567 			dev_dbg(hdev->dev,
4568 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4569 				cur_addr, cur_addr + chunk_size);
4570 
4571 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4572 					lower_32_bits(val));
4573 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4574 					upper_32_bits(val));
4575 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4576 						lower_32_bits(cur_addr));
4577 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4578 						upper_32_bits(cur_addr));
4579 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4580 					chunk_size);
4581 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4582 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4583 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4584 
4585 			cur_addr += chunk_size;
4586 
4587 			if (cur_addr == prop->dram_end_address)
4588 				break;
4589 		}
4590 
4591 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4592 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4593 
4594 			rc = hl_poll_timeout(
4595 				hdev,
4596 				mmDMA0_CORE_STS0 + dma_offset,
4597 				busy,
4598 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4599 				1000,
4600 				HBM_SCRUBBING_TIMEOUT_US);
4601 
4602 			if (rc) {
4603 				dev_err(hdev->dev,
4604 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4605 					dma_id);
4606 				return -EIO;
4607 			}
4608 		}
4609 	}
4610 
4611 	return 0;
4612 }
4613 
4614 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4615 {
4616 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4617 	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4618 	u64 addr, size, val = hdev->memory_scrub_val;
4619 	ktime_t timeout;
4620 	int rc = 0;
4621 
4622 	if (!hdev->memory_scrub)
4623 		return 0;
4624 
4625 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4626 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4627 		if (ktime_compare(ktime_get(), timeout) > 0) {
4628 			dev_err(hdev->dev, "waiting for idle timeout\n");
4629 			return -ETIMEDOUT;
4630 		}
4631 		usleep_range((1000 >> 2) + 1, 1000);
4632 	}
4633 
4634 	/* Scrub SRAM */
4635 	addr = prop->sram_user_base_address;
4636 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4637 
4638 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4639 			addr, addr + size, val);
4640 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4641 	if (rc) {
4642 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4643 		return rc;
4644 	}
4645 
4646 	/* Scrub HBM using all DMA channels in parallel */
4647 	rc = gaudi_scrub_device_dram(hdev, val);
4648 	if (rc) {
4649 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4650 		return rc;
4651 	}
4652 
4653 	return 0;
4654 }
4655 
4656 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4657 				u32 queue_id, dma_addr_t *dma_handle,
4658 				u16 *queue_len)
4659 {
4660 	struct gaudi_device *gaudi = hdev->asic_specific;
4661 	struct gaudi_internal_qman_info *q;
4662 
4663 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4664 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4665 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4666 		return NULL;
4667 	}
4668 
4669 	q = &gaudi->internal_qmans[queue_id];
4670 	*dma_handle = q->pq_dma_addr;
4671 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4672 
4673 	return q->pq_kernel_addr;
4674 }
4675 
4676 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4677 				u16 len, u32 timeout, u64 *result)
4678 {
4679 	struct gaudi_device *gaudi = hdev->asic_specific;
4680 
4681 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4682 		if (result)
4683 			*result = 0;
4684 		return 0;
4685 	}
4686 
4687 	if (!timeout)
4688 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4689 
4690 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4691 						timeout, result);
4692 }
4693 
4694 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4695 {
4696 	struct packet_msg_prot *fence_pkt;
4697 	dma_addr_t pkt_dma_addr;
4698 	u32 fence_val, tmp, timeout_usec;
4699 	dma_addr_t fence_dma_addr;
4700 	u32 *fence_ptr;
4701 	int rc;
4702 
4703 	if (hdev->pldm)
4704 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4705 	else
4706 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4707 
4708 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4709 
4710 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4711 	if (!fence_ptr) {
4712 		dev_err(hdev->dev,
4713 			"Failed to allocate memory for H/W queue %d testing\n",
4714 			hw_queue_id);
4715 		return -ENOMEM;
4716 	}
4717 
4718 	*fence_ptr = 0;
4719 
4720 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4721 						&pkt_dma_addr);
4722 	if (!fence_pkt) {
4723 		dev_err(hdev->dev,
4724 			"Failed to allocate packet for H/W queue %d testing\n",
4725 			hw_queue_id);
4726 		rc = -ENOMEM;
4727 		goto free_fence_ptr;
4728 	}
4729 
4730 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4731 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4732 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4733 
4734 	fence_pkt->ctl = cpu_to_le32(tmp);
4735 	fence_pkt->value = cpu_to_le32(fence_val);
4736 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4737 
4738 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4739 					sizeof(struct packet_msg_prot),
4740 					pkt_dma_addr);
4741 	if (rc) {
4742 		dev_err(hdev->dev,
4743 			"Failed to send fence packet to H/W queue %d\n",
4744 			hw_queue_id);
4745 		goto free_pkt;
4746 	}
4747 
4748 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4749 					1000, timeout_usec, true);
4750 
4751 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4752 
4753 	if (rc == -ETIMEDOUT) {
4754 		dev_err(hdev->dev,
4755 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4756 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4757 		rc = -EIO;
4758 	}
4759 
4760 free_pkt:
4761 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4762 free_fence_ptr:
4763 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4764 	return rc;
4765 }
4766 
4767 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4768 {
4769 	struct gaudi_device *gaudi = hdev->asic_specific;
4770 
4771 	/*
4772 	 * check capability here as send_cpu_message() won't update the result
4773 	 * value if no capability
4774 	 */
4775 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4776 		return 0;
4777 
4778 	return hl_fw_test_cpu_queue(hdev);
4779 }
4780 
4781 static int gaudi_test_queues(struct hl_device *hdev)
4782 {
4783 	int i, rc, ret_val = 0;
4784 
4785 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4786 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4787 			rc = gaudi_test_queue(hdev, i);
4788 			if (rc)
4789 				ret_val = -EINVAL;
4790 		}
4791 	}
4792 
4793 	rc = gaudi_test_cpu_queue(hdev);
4794 	if (rc)
4795 		ret_val = -EINVAL;
4796 
4797 	return ret_val;
4798 }
4799 
4800 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4801 		gfp_t mem_flags, dma_addr_t *dma_handle)
4802 {
4803 	void *kernel_addr;
4804 
4805 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4806 		return NULL;
4807 
4808 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4809 
4810 	/* Shift to the device's base physical address of host memory */
4811 	if (kernel_addr)
4812 		*dma_handle += HOST_PHYS_BASE;
4813 
4814 	return kernel_addr;
4815 }
4816 
4817 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4818 			dma_addr_t dma_addr)
4819 {
4820 	/* Cancel the device's base physical address of host memory */
4821 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4822 
4823 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4824 }
4825 
4826 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4827 					size_t size, dma_addr_t *dma_handle)
4828 {
4829 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4830 }
4831 
4832 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4833 						size_t size, void *vaddr)
4834 {
4835 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4836 }
4837 
4838 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4839 {
4840 	struct scatterlist *sg, *sg_next_iter;
4841 	u32 count, dma_desc_cnt;
4842 	u64 len, len_next;
4843 	dma_addr_t addr, addr_next;
4844 
4845 	dma_desc_cnt = 0;
4846 
4847 	for_each_sgtable_dma_sg(sgt, sg, count) {
4848 		len = sg_dma_len(sg);
4849 		addr = sg_dma_address(sg);
4850 
4851 		if (len == 0)
4852 			break;
4853 
4854 		while ((count + 1) < sgt->nents) {
4855 			sg_next_iter = sg_next(sg);
4856 			len_next = sg_dma_len(sg_next_iter);
4857 			addr_next = sg_dma_address(sg_next_iter);
4858 
4859 			if (len_next == 0)
4860 				break;
4861 
4862 			if ((addr + len == addr_next) &&
4863 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4864 				len += len_next;
4865 				count++;
4866 				sg = sg_next_iter;
4867 			} else {
4868 				break;
4869 			}
4870 		}
4871 
4872 		dma_desc_cnt++;
4873 	}
4874 
4875 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4876 }
4877 
4878 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4879 				struct hl_cs_parser *parser,
4880 				struct packet_lin_dma *user_dma_pkt,
4881 				u64 addr, enum dma_data_direction dir)
4882 {
4883 	struct hl_userptr *userptr;
4884 	int rc;
4885 
4886 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4887 			parser->job_userptr_list, &userptr))
4888 		goto already_pinned;
4889 
4890 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4891 	if (!userptr)
4892 		return -ENOMEM;
4893 
4894 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4895 				userptr);
4896 	if (rc)
4897 		goto free_userptr;
4898 
4899 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4900 
4901 	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4902 	if (rc) {
4903 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4904 		goto unpin_memory;
4905 	}
4906 
4907 	userptr->dma_mapped = true;
4908 	userptr->dir = dir;
4909 
4910 already_pinned:
4911 	parser->patched_cb_size +=
4912 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4913 
4914 	return 0;
4915 
4916 unpin_memory:
4917 	list_del(&userptr->job_node);
4918 	hl_unpin_host_memory(hdev, userptr);
4919 free_userptr:
4920 	kfree(userptr);
4921 	return rc;
4922 }
4923 
4924 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4925 				struct hl_cs_parser *parser,
4926 				struct packet_lin_dma *user_dma_pkt,
4927 				bool src_in_host)
4928 {
4929 	enum dma_data_direction dir;
4930 	bool skip_host_mem_pin = false, user_memset;
4931 	u64 addr;
4932 	int rc = 0;
4933 
4934 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4935 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4936 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4937 
4938 	if (src_in_host) {
4939 		if (user_memset)
4940 			skip_host_mem_pin = true;
4941 
4942 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4943 		dir = DMA_TO_DEVICE;
4944 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4945 	} else {
4946 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4947 		dir = DMA_FROM_DEVICE;
4948 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4949 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4950 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4951 	}
4952 
4953 	if (skip_host_mem_pin)
4954 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4955 	else
4956 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4957 						addr, dir);
4958 
4959 	return rc;
4960 }
4961 
4962 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4963 				struct hl_cs_parser *parser,
4964 				struct packet_lin_dma *user_dma_pkt)
4965 {
4966 	bool src_in_host = false;
4967 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4968 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4969 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4970 
4971 	dev_dbg(hdev->dev, "DMA packet details:\n");
4972 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4973 				le64_to_cpu(user_dma_pkt->src_addr));
4974 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4975 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4976 
4977 	/*
4978 	 * Special handling for DMA with size 0. Bypass all validations
4979 	 * because no transactions will be done except for WR_COMP, which
4980 	 * is not a security issue
4981 	 */
4982 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
4983 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4984 		return 0;
4985 	}
4986 
4987 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4988 		src_in_host = true;
4989 
4990 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4991 						src_in_host);
4992 }
4993 
4994 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4995 					struct hl_cs_parser *parser,
4996 					struct packet_load_and_exe *user_pkt)
4997 {
4998 	u32 cfg;
4999 
5000 	cfg = le32_to_cpu(user_pkt->cfg);
5001 
5002 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5003 		dev_err(hdev->dev,
5004 			"User not allowed to use Load and Execute\n");
5005 		return -EPERM;
5006 	}
5007 
5008 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5009 
5010 	return 0;
5011 }
5012 
5013 static int gaudi_validate_cb(struct hl_device *hdev,
5014 			struct hl_cs_parser *parser, bool is_mmu)
5015 {
5016 	u32 cb_parsed_length = 0;
5017 	int rc = 0;
5018 
5019 	parser->patched_cb_size = 0;
5020 
5021 	/* cb_user_size is more than 0 so loop will always be executed */
5022 	while (cb_parsed_length < parser->user_cb_size) {
5023 		enum packet_id pkt_id;
5024 		u16 pkt_size;
5025 		struct gaudi_packet *user_pkt;
5026 
5027 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5028 
5029 		pkt_id = (enum packet_id) (
5030 				(le64_to_cpu(user_pkt->header) &
5031 				PACKET_HEADER_PACKET_ID_MASK) >>
5032 					PACKET_HEADER_PACKET_ID_SHIFT);
5033 
5034 		if (!validate_packet_id(pkt_id)) {
5035 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5036 			rc = -EINVAL;
5037 			break;
5038 		}
5039 
5040 		pkt_size = gaudi_packet_sizes[pkt_id];
5041 		cb_parsed_length += pkt_size;
5042 		if (cb_parsed_length > parser->user_cb_size) {
5043 			dev_err(hdev->dev,
5044 				"packet 0x%x is out of CB boundary\n", pkt_id);
5045 			rc = -EINVAL;
5046 			break;
5047 		}
5048 
5049 		switch (pkt_id) {
5050 		case PACKET_MSG_PROT:
5051 			dev_err(hdev->dev,
5052 				"User not allowed to use MSG_PROT\n");
5053 			rc = -EPERM;
5054 			break;
5055 
5056 		case PACKET_CP_DMA:
5057 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5058 			rc = -EPERM;
5059 			break;
5060 
5061 		case PACKET_STOP:
5062 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5063 			rc = -EPERM;
5064 			break;
5065 
5066 		case PACKET_WREG_BULK:
5067 			dev_err(hdev->dev,
5068 				"User not allowed to use WREG_BULK\n");
5069 			rc = -EPERM;
5070 			break;
5071 
5072 		case PACKET_LOAD_AND_EXE:
5073 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5074 				(struct packet_load_and_exe *) user_pkt);
5075 			break;
5076 
5077 		case PACKET_LIN_DMA:
5078 			parser->contains_dma_pkt = true;
5079 			if (is_mmu)
5080 				parser->patched_cb_size += pkt_size;
5081 			else
5082 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5083 					(struct packet_lin_dma *) user_pkt);
5084 			break;
5085 
5086 		case PACKET_WREG_32:
5087 		case PACKET_MSG_LONG:
5088 		case PACKET_MSG_SHORT:
5089 		case PACKET_REPEAT:
5090 		case PACKET_FENCE:
5091 		case PACKET_NOP:
5092 		case PACKET_ARB_POINT:
5093 			parser->patched_cb_size += pkt_size;
5094 			break;
5095 
5096 		default:
5097 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5098 				pkt_id);
5099 			rc = -EINVAL;
5100 			break;
5101 		}
5102 
5103 		if (rc)
5104 			break;
5105 	}
5106 
5107 	/*
5108 	 * The new CB should have space at the end for two MSG_PROT packets:
5109 	 * 1. Optional NOP padding for cacheline alignment
5110 	 * 2. A packet that will act as a completion packet
5111 	 * 3. A packet that will generate MSI interrupt
5112 	 */
5113 	if (parser->completion)
5114 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5115 			parser->patched_cb_size);
5116 
5117 	return rc;
5118 }
5119 
5120 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5121 				struct hl_cs_parser *parser,
5122 				struct packet_lin_dma *user_dma_pkt,
5123 				struct packet_lin_dma *new_dma_pkt,
5124 				u32 *new_dma_pkt_size)
5125 {
5126 	struct hl_userptr *userptr;
5127 	struct scatterlist *sg, *sg_next_iter;
5128 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5129 	u64 len, len_next;
5130 	dma_addr_t dma_addr, dma_addr_next;
5131 	u64 device_memory_addr, addr;
5132 	enum dma_data_direction dir;
5133 	struct sg_table *sgt;
5134 	bool src_in_host = false;
5135 	bool skip_host_mem_pin = false;
5136 	bool user_memset;
5137 
5138 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5139 
5140 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5141 		src_in_host = true;
5142 
5143 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5144 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5145 
5146 	if (src_in_host) {
5147 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5148 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5149 		dir = DMA_TO_DEVICE;
5150 		if (user_memset)
5151 			skip_host_mem_pin = true;
5152 	} else {
5153 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5154 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5155 		dir = DMA_FROM_DEVICE;
5156 	}
5157 
5158 	if ((!skip_host_mem_pin) &&
5159 		(!hl_userptr_is_pinned(hdev, addr,
5160 					le32_to_cpu(user_dma_pkt->tsize),
5161 					parser->job_userptr_list, &userptr))) {
5162 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5163 				addr, user_dma_pkt->tsize);
5164 		return -EFAULT;
5165 	}
5166 
5167 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5168 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5169 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5170 		return 0;
5171 	}
5172 
5173 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5174 
5175 	sgt = userptr->sgt;
5176 	dma_desc_cnt = 0;
5177 
5178 	for_each_sgtable_dma_sg(sgt, sg, count) {
5179 		len = sg_dma_len(sg);
5180 		dma_addr = sg_dma_address(sg);
5181 
5182 		if (len == 0)
5183 			break;
5184 
5185 		while ((count + 1) < sgt->nents) {
5186 			sg_next_iter = sg_next(sg);
5187 			len_next = sg_dma_len(sg_next_iter);
5188 			dma_addr_next = sg_dma_address(sg_next_iter);
5189 
5190 			if (len_next == 0)
5191 				break;
5192 
5193 			if ((dma_addr + len == dma_addr_next) &&
5194 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5195 				len += len_next;
5196 				count++;
5197 				sg = sg_next_iter;
5198 			} else {
5199 				break;
5200 			}
5201 		}
5202 
5203 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5204 		if (likely(dma_desc_cnt))
5205 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5206 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5207 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5208 		new_dma_pkt->tsize = cpu_to_le32(len);
5209 
5210 		if (dir == DMA_TO_DEVICE) {
5211 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5212 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5213 		} else {
5214 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5215 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5216 		}
5217 
5218 		if (!user_memset)
5219 			device_memory_addr += len;
5220 		dma_desc_cnt++;
5221 		new_dma_pkt++;
5222 	}
5223 
5224 	if (!dma_desc_cnt) {
5225 		dev_err(hdev->dev,
5226 			"Error of 0 SG entries when patching DMA packet\n");
5227 		return -EFAULT;
5228 	}
5229 
5230 	/* Fix the last dma packet - wrcomp must be as user set it */
5231 	new_dma_pkt--;
5232 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5233 
5234 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5235 
5236 	return 0;
5237 }
5238 
5239 static int gaudi_patch_cb(struct hl_device *hdev,
5240 				struct hl_cs_parser *parser)
5241 {
5242 	u32 cb_parsed_length = 0;
5243 	u32 cb_patched_cur_length = 0;
5244 	int rc = 0;
5245 
5246 	/* cb_user_size is more than 0 so loop will always be executed */
5247 	while (cb_parsed_length < parser->user_cb_size) {
5248 		enum packet_id pkt_id;
5249 		u16 pkt_size;
5250 		u32 new_pkt_size = 0;
5251 		struct gaudi_packet *user_pkt, *kernel_pkt;
5252 
5253 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5254 		kernel_pkt = parser->patched_cb->kernel_address +
5255 					cb_patched_cur_length;
5256 
5257 		pkt_id = (enum packet_id) (
5258 				(le64_to_cpu(user_pkt->header) &
5259 				PACKET_HEADER_PACKET_ID_MASK) >>
5260 					PACKET_HEADER_PACKET_ID_SHIFT);
5261 
5262 		if (!validate_packet_id(pkt_id)) {
5263 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5264 			rc = -EINVAL;
5265 			break;
5266 		}
5267 
5268 		pkt_size = gaudi_packet_sizes[pkt_id];
5269 		cb_parsed_length += pkt_size;
5270 		if (cb_parsed_length > parser->user_cb_size) {
5271 			dev_err(hdev->dev,
5272 				"packet 0x%x is out of CB boundary\n", pkt_id);
5273 			rc = -EINVAL;
5274 			break;
5275 		}
5276 
5277 		switch (pkt_id) {
5278 		case PACKET_LIN_DMA:
5279 			rc = gaudi_patch_dma_packet(hdev, parser,
5280 					(struct packet_lin_dma *) user_pkt,
5281 					(struct packet_lin_dma *) kernel_pkt,
5282 					&new_pkt_size);
5283 			cb_patched_cur_length += new_pkt_size;
5284 			break;
5285 
5286 		case PACKET_MSG_PROT:
5287 			dev_err(hdev->dev,
5288 				"User not allowed to use MSG_PROT\n");
5289 			rc = -EPERM;
5290 			break;
5291 
5292 		case PACKET_CP_DMA:
5293 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5294 			rc = -EPERM;
5295 			break;
5296 
5297 		case PACKET_STOP:
5298 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5299 			rc = -EPERM;
5300 			break;
5301 
5302 		case PACKET_WREG_32:
5303 		case PACKET_WREG_BULK:
5304 		case PACKET_MSG_LONG:
5305 		case PACKET_MSG_SHORT:
5306 		case PACKET_REPEAT:
5307 		case PACKET_FENCE:
5308 		case PACKET_NOP:
5309 		case PACKET_ARB_POINT:
5310 		case PACKET_LOAD_AND_EXE:
5311 			memcpy(kernel_pkt, user_pkt, pkt_size);
5312 			cb_patched_cur_length += pkt_size;
5313 			break;
5314 
5315 		default:
5316 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5317 				pkt_id);
5318 			rc = -EINVAL;
5319 			break;
5320 		}
5321 
5322 		if (rc)
5323 			break;
5324 	}
5325 
5326 	return rc;
5327 }
5328 
5329 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5330 		struct hl_cs_parser *parser)
5331 {
5332 	u64 handle;
5333 	u32 patched_cb_size;
5334 	struct hl_cb *user_cb;
5335 	int rc;
5336 
5337 	/*
5338 	 * The new CB should have space at the end for two MSG_PROT packets:
5339 	 * 1. Optional NOP padding for cacheline alignment
5340 	 * 2. A packet that will act as a completion packet
5341 	 * 3. A packet that will generate MSI interrupt
5342 	 */
5343 	if (parser->completion)
5344 		parser->patched_cb_size = parser->user_cb_size +
5345 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5346 	else
5347 		parser->patched_cb_size = parser->user_cb_size;
5348 
5349 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5350 				parser->patched_cb_size, false, false,
5351 				&handle);
5352 
5353 	if (rc) {
5354 		dev_err(hdev->dev,
5355 			"Failed to allocate patched CB for DMA CS %d\n",
5356 			rc);
5357 		return rc;
5358 	}
5359 
5360 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5361 	/* hl_cb_get should never fail */
5362 	if (!parser->patched_cb) {
5363 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5364 		rc = -EFAULT;
5365 		goto out;
5366 	}
5367 
5368 	/*
5369 	 * We are protected from overflow because the check
5370 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5371 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5372 	 *
5373 	 * There is no option to reach here without going through that check because:
5374 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5375 	 *    an external queue.
5376 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5377 	 */
5378 	memcpy(parser->patched_cb->kernel_address,
5379 		parser->user_cb->kernel_address,
5380 		parser->user_cb_size);
5381 
5382 	patched_cb_size = parser->patched_cb_size;
5383 
5384 	/* Validate patched CB instead of user CB */
5385 	user_cb = parser->user_cb;
5386 	parser->user_cb = parser->patched_cb;
5387 	rc = gaudi_validate_cb(hdev, parser, true);
5388 	parser->user_cb = user_cb;
5389 
5390 	if (rc) {
5391 		hl_cb_put(parser->patched_cb);
5392 		goto out;
5393 	}
5394 
5395 	if (patched_cb_size != parser->patched_cb_size) {
5396 		dev_err(hdev->dev, "user CB size mismatch\n");
5397 		hl_cb_put(parser->patched_cb);
5398 		rc = -EINVAL;
5399 		goto out;
5400 	}
5401 
5402 out:
5403 	/*
5404 	 * Always call cb destroy here because we still have 1 reference
5405 	 * to it by calling cb_get earlier. After the job will be completed,
5406 	 * cb_put will release it, but here we want to remove it from the
5407 	 * idr
5408 	 */
5409 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5410 
5411 	return rc;
5412 }
5413 
5414 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5415 		struct hl_cs_parser *parser)
5416 {
5417 	u64 handle;
5418 	int rc;
5419 
5420 	rc = gaudi_validate_cb(hdev, parser, false);
5421 
5422 	if (rc)
5423 		goto free_userptr;
5424 
5425 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5426 				parser->patched_cb_size, false, false,
5427 				&handle);
5428 	if (rc) {
5429 		dev_err(hdev->dev,
5430 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5431 		goto free_userptr;
5432 	}
5433 
5434 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5435 	/* hl_cb_get should never fail here */
5436 	if (!parser->patched_cb) {
5437 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5438 		rc = -EFAULT;
5439 		goto out;
5440 	}
5441 
5442 	rc = gaudi_patch_cb(hdev, parser);
5443 
5444 	if (rc)
5445 		hl_cb_put(parser->patched_cb);
5446 
5447 out:
5448 	/*
5449 	 * Always call cb destroy here because we still have 1 reference
5450 	 * to it by calling cb_get earlier. After the job will be completed,
5451 	 * cb_put will release it, but here we want to remove it from the
5452 	 * idr
5453 	 */
5454 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5455 
5456 free_userptr:
5457 	if (rc)
5458 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5459 	return rc;
5460 }
5461 
5462 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5463 					struct hl_cs_parser *parser)
5464 {
5465 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5466 	struct gaudi_device *gaudi = hdev->asic_specific;
5467 	u32 nic_queue_offset, nic_mask_q_id;
5468 
5469 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5470 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5471 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5472 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5473 
5474 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5475 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5476 			return -EINVAL;
5477 		}
5478 	}
5479 
5480 	/* For internal queue jobs just check if CB address is valid */
5481 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5482 					parser->user_cb_size,
5483 					asic_prop->sram_user_base_address,
5484 					asic_prop->sram_end_address))
5485 		return 0;
5486 
5487 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5488 					parser->user_cb_size,
5489 					asic_prop->dram_user_base_address,
5490 					asic_prop->dram_end_address))
5491 		return 0;
5492 
5493 	/* PMMU and HPMMU addresses are equal, check only one of them */
5494 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5495 					parser->user_cb_size,
5496 					asic_prop->pmmu.start_addr,
5497 					asic_prop->pmmu.end_addr))
5498 		return 0;
5499 
5500 	dev_err(hdev->dev,
5501 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5502 		parser->user_cb, parser->user_cb_size);
5503 
5504 	return -EFAULT;
5505 }
5506 
5507 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5508 {
5509 	struct gaudi_device *gaudi = hdev->asic_specific;
5510 
5511 	if (parser->queue_type == QUEUE_TYPE_INT)
5512 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5513 
5514 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5515 		return gaudi_parse_cb_mmu(hdev, parser);
5516 	else
5517 		return gaudi_parse_cb_no_mmu(hdev, parser);
5518 }
5519 
5520 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5521 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5522 				u32 msi_vec, bool eb)
5523 {
5524 	struct packet_msg_prot *cq_pkt;
5525 	struct packet_nop *cq_padding;
5526 	u64 msi_addr;
5527 	u32 tmp;
5528 
5529 	cq_padding = kernel_address + original_len;
5530 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5531 
5532 	while ((void *)cq_padding < (void *)cq_pkt) {
5533 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5534 		cq_padding++;
5535 	}
5536 
5537 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5538 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5539 
5540 	if (eb)
5541 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5542 
5543 	cq_pkt->ctl = cpu_to_le32(tmp);
5544 	cq_pkt->value = cpu_to_le32(cq_val);
5545 	cq_pkt->addr = cpu_to_le64(cq_addr);
5546 
5547 	cq_pkt++;
5548 
5549 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5550 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5551 	cq_pkt->ctl = cpu_to_le32(tmp);
5552 	cq_pkt->value = cpu_to_le32(1);
5553 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5554 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5555 }
5556 
5557 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5558 {
5559 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5560 }
5561 
5562 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5563 					u32 size, u64 val)
5564 {
5565 	struct packet_lin_dma *lin_dma_pkt;
5566 	struct hl_cs_job *job;
5567 	u32 cb_size, ctl, err_cause;
5568 	struct hl_cb *cb;
5569 	int rc;
5570 
5571 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5572 	if (!cb)
5573 		return -EFAULT;
5574 
5575 	lin_dma_pkt = cb->kernel_address;
5576 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5577 	cb_size = sizeof(*lin_dma_pkt);
5578 
5579 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5580 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5581 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5582 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5583 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5584 
5585 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5586 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5587 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5588 	lin_dma_pkt->tsize = cpu_to_le32(size);
5589 
5590 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5591 	if (!job) {
5592 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5593 		rc = -ENOMEM;
5594 		goto release_cb;
5595 	}
5596 
5597 	/* Verify DMA is OK */
5598 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5599 	if (err_cause && !hdev->init_done) {
5600 		dev_dbg(hdev->dev,
5601 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5602 			err_cause);
5603 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5604 	}
5605 
5606 	job->id = 0;
5607 	job->user_cb = cb;
5608 	atomic_inc(&job->user_cb->cs_cnt);
5609 	job->user_cb_size = cb_size;
5610 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5611 	job->patched_cb = job->user_cb;
5612 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5613 
5614 	hl_debugfs_add_job(hdev, job);
5615 
5616 	rc = gaudi_send_job_on_qman0(hdev, job);
5617 	hl_debugfs_remove_job(hdev, job);
5618 	kfree(job);
5619 	atomic_dec(&cb->cs_cnt);
5620 
5621 	/* Verify DMA is OK */
5622 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5623 	if (err_cause) {
5624 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5625 		rc = -EIO;
5626 		if (!hdev->init_done) {
5627 			dev_dbg(hdev->dev,
5628 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5629 				err_cause);
5630 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5631 		}
5632 	}
5633 
5634 release_cb:
5635 	hl_cb_put(cb);
5636 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5637 
5638 	return rc;
5639 }
5640 
5641 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5642 					u32 num_regs, u32 val)
5643 {
5644 	struct packet_msg_long *pkt;
5645 	struct hl_cs_job *job;
5646 	u32 cb_size, ctl;
5647 	struct hl_cb *cb;
5648 	int i, rc;
5649 
5650 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5651 
5652 	if (cb_size > SZ_2M) {
5653 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5654 		return -ENOMEM;
5655 	}
5656 
5657 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5658 	if (!cb)
5659 		return -EFAULT;
5660 
5661 	pkt = cb->kernel_address;
5662 
5663 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5664 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5665 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5666 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5667 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5668 
5669 	for (i = 0; i < num_regs ; i++, pkt++) {
5670 		pkt->ctl = cpu_to_le32(ctl);
5671 		pkt->value = cpu_to_le32(val);
5672 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5673 	}
5674 
5675 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5676 	if (!job) {
5677 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5678 		rc = -ENOMEM;
5679 		goto release_cb;
5680 	}
5681 
5682 	job->id = 0;
5683 	job->user_cb = cb;
5684 	atomic_inc(&job->user_cb->cs_cnt);
5685 	job->user_cb_size = cb_size;
5686 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5687 	job->patched_cb = job->user_cb;
5688 	job->job_cb_size = cb_size;
5689 
5690 	hl_debugfs_add_job(hdev, job);
5691 
5692 	rc = gaudi_send_job_on_qman0(hdev, job);
5693 	hl_debugfs_remove_job(hdev, job);
5694 	kfree(job);
5695 	atomic_dec(&cb->cs_cnt);
5696 
5697 release_cb:
5698 	hl_cb_put(cb);
5699 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5700 
5701 	return rc;
5702 }
5703 
5704 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5705 {
5706 	u64 base_addr;
5707 	u32 num_regs;
5708 	int rc;
5709 
5710 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5711 	num_regs = NUM_OF_SOB_IN_BLOCK;
5712 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5713 	if (rc) {
5714 		dev_err(hdev->dev, "failed resetting SM registers");
5715 		return -ENOMEM;
5716 	}
5717 
5718 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5719 	num_regs = NUM_OF_SOB_IN_BLOCK;
5720 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721 	if (rc) {
5722 		dev_err(hdev->dev, "failed resetting SM registers");
5723 		return -ENOMEM;
5724 	}
5725 
5726 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5727 	num_regs = NUM_OF_SOB_IN_BLOCK;
5728 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729 	if (rc) {
5730 		dev_err(hdev->dev, "failed resetting SM registers");
5731 		return -ENOMEM;
5732 	}
5733 
5734 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5735 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5736 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737 	if (rc) {
5738 		dev_err(hdev->dev, "failed resetting SM registers");
5739 		return -ENOMEM;
5740 	}
5741 
5742 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5743 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5744 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745 	if (rc) {
5746 		dev_err(hdev->dev, "failed resetting SM registers");
5747 		return -ENOMEM;
5748 	}
5749 
5750 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5751 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5752 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753 	if (rc) {
5754 		dev_err(hdev->dev, "failed resetting SM registers");
5755 		return -ENOMEM;
5756 	}
5757 
5758 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5759 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5760 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5761 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5762 	if (rc) {
5763 		dev_err(hdev->dev, "failed resetting SM registers");
5764 		return -ENOMEM;
5765 	}
5766 
5767 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5768 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5769 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5770 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771 	if (rc) {
5772 		dev_err(hdev->dev, "failed resetting SM registers");
5773 		return -ENOMEM;
5774 	}
5775 
5776 	return 0;
5777 }
5778 
5779 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5780 {
5781 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5782 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5783 	int i;
5784 
5785 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5786 		u64 sob_addr = CFG_BASE +
5787 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5788 				(i * sob_delta);
5789 		u32 dma_offset = i * DMA_CORE_OFFSET;
5790 
5791 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5792 				lower_32_bits(sob_addr));
5793 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5794 				upper_32_bits(sob_addr));
5795 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5796 
5797 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5798 		 * modified by the user for SRAM reduction
5799 		 */
5800 		if (i > 1)
5801 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5802 								0x00000001);
5803 	}
5804 }
5805 
5806 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5807 {
5808 	u32 qman_offset;
5809 	int i;
5810 
5811 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5812 		qman_offset = i * DMA_QMAN_OFFSET;
5813 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5814 	}
5815 
5816 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5817 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5818 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5819 	}
5820 
5821 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5822 		qman_offset = i * TPC_QMAN_OFFSET;
5823 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5824 	}
5825 
5826 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5827 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5828 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5829 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5830 	}
5831 }
5832 
5833 static int gaudi_restore_user_registers(struct hl_device *hdev)
5834 {
5835 	int rc;
5836 
5837 	rc = gaudi_restore_sm_registers(hdev);
5838 	if (rc)
5839 		return rc;
5840 
5841 	gaudi_restore_dma_registers(hdev);
5842 	gaudi_restore_qm_registers(hdev);
5843 
5844 	return 0;
5845 }
5846 
5847 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5848 {
5849 	return 0;
5850 }
5851 
5852 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5853 {
5854 	u32 size = hdev->asic_prop.mmu_pgt_size +
5855 			hdev->asic_prop.mmu_cache_mng_size;
5856 	struct gaudi_device *gaudi = hdev->asic_specific;
5857 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5858 
5859 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5860 		return 0;
5861 
5862 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5863 }
5864 
5865 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5866 {
5867 
5868 }
5869 
5870 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5871 					u32 size_to_dma, dma_addr_t dma_addr)
5872 {
5873 	u32 err_cause, val;
5874 	u64 dma_offset;
5875 	int rc;
5876 
5877 	dma_offset = dma_id * DMA_CORE_OFFSET;
5878 
5879 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5880 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5881 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5882 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5883 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5884 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5885 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5886 
5887 	rc = hl_poll_timeout(
5888 		hdev,
5889 		mmDMA0_CORE_STS0 + dma_offset,
5890 		val,
5891 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5892 		0,
5893 		1000000);
5894 
5895 	if (rc) {
5896 		dev_err(hdev->dev,
5897 			"DMA %d timed-out during reading of 0x%llx\n",
5898 			dma_id, addr);
5899 		return -EIO;
5900 	}
5901 
5902 	/* Verify DMA is OK */
5903 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5904 	if (err_cause) {
5905 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5906 		dev_dbg(hdev->dev,
5907 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5908 			err_cause);
5909 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5910 
5911 		return -EIO;
5912 	}
5913 
5914 	return 0;
5915 }
5916 
5917 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5918 				void *blob_addr)
5919 {
5920 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5921 	u32 qm_glbl_sts0, qm_cgm_sts;
5922 	u64 dma_offset, qm_offset;
5923 	dma_addr_t dma_addr;
5924 	void *kernel_addr;
5925 	bool is_eng_idle;
5926 	int rc = 0, dma_id;
5927 
5928 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5929 
5930 	if (!kernel_addr)
5931 		return -ENOMEM;
5932 
5933 	hdev->asic_funcs->hw_queues_lock(hdev);
5934 
5935 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5936 	dma_offset = dma_id * DMA_CORE_OFFSET;
5937 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5938 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5939 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5940 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5941 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5942 		      IS_DMA_IDLE(dma_core_sts0);
5943 
5944 	if (!is_eng_idle) {
5945 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5946 		dma_offset = dma_id * DMA_CORE_OFFSET;
5947 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5948 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5949 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5950 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5951 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5952 			      IS_DMA_IDLE(dma_core_sts0);
5953 
5954 		if (!is_eng_idle) {
5955 			dev_err_ratelimited(hdev->dev,
5956 				"Can't read via DMA because it is BUSY\n");
5957 			rc = -EAGAIN;
5958 			goto out;
5959 		}
5960 	}
5961 
5962 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5963 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5964 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5965 
5966 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5967 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5968 	 * ASID
5969 	 */
5970 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5971 
5972 	/* Verify DMA is OK */
5973 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5974 	if (err_cause) {
5975 		dev_dbg(hdev->dev,
5976 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5977 			err_cause);
5978 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5979 	}
5980 
5981 	pos = 0;
5982 	size_left = size;
5983 	size_to_dma = SZ_2M;
5984 
5985 	while (size_left > 0) {
5986 
5987 		if (size_left < SZ_2M)
5988 			size_to_dma = size_left;
5989 
5990 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5991 						dma_addr);
5992 		if (rc)
5993 			break;
5994 
5995 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
5996 
5997 		if (size_left <= SZ_2M)
5998 			break;
5999 
6000 		pos += SZ_2M;
6001 		addr += SZ_2M;
6002 		size_left -= SZ_2M;
6003 	}
6004 
6005 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6006 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6007 	 * ASID
6008 	 */
6009 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6010 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6011 
6012 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6013 
6014 out:
6015 	hdev->asic_funcs->hw_queues_unlock(hdev);
6016 
6017 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6018 
6019 	return rc;
6020 }
6021 
6022 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6023 {
6024 	struct gaudi_device *gaudi = hdev->asic_specific;
6025 
6026 	if (hdev->reset_info.hard_reset_pending)
6027 		return U64_MAX;
6028 
6029 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6030 			(addr - gaudi->hbm_bar_cur_addr));
6031 }
6032 
6033 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6034 {
6035 	struct gaudi_device *gaudi = hdev->asic_specific;
6036 
6037 	if (hdev->reset_info.hard_reset_pending)
6038 		return;
6039 
6040 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6041 			(addr - gaudi->hbm_bar_cur_addr));
6042 }
6043 
6044 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6045 {
6046 	/* mask to zero the MMBP and ASID bits */
6047 	WREG32_AND(reg, ~0x7FF);
6048 	WREG32_OR(reg, asid);
6049 }
6050 
6051 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6052 {
6053 	struct gaudi_device *gaudi = hdev->asic_specific;
6054 
6055 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6056 		return;
6057 
6058 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6059 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6060 		return;
6061 	}
6062 
6063 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6064 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6065 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6066 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6067 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6068 
6069 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6070 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6071 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6072 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6073 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6074 
6075 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6076 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6077 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6078 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6079 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6080 
6081 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086 
6087 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092 
6093 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098 
6099 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104 
6105 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110 
6111 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6112 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6113 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6115 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6116 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6117 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6118 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6119 
6120 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6121 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6122 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6123 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6124 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6125 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6126 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6127 
6128 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6129 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6130 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6133 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6134 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6135 
6136 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6137 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6138 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6141 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6143 
6144 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6146 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6151 
6152 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6159 
6160 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6161 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6162 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6167 
6168 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6175 
6176 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6183 
6184 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6185 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6186 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6194 
6195 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6202 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6207 
6208 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6209 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6210 				asid);
6211 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6212 				asid);
6213 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6214 				asid);
6215 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6216 				asid);
6217 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6218 				asid);
6219 	}
6220 
6221 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6222 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6223 				asid);
6224 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6225 				asid);
6226 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6227 				asid);
6228 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6229 				asid);
6230 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6231 				asid);
6232 	}
6233 
6234 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6235 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6236 				asid);
6237 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6238 				asid);
6239 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6240 				asid);
6241 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6242 				asid);
6243 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6244 				asid);
6245 	}
6246 
6247 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6248 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6249 				asid);
6250 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6251 				asid);
6252 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6253 				asid);
6254 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6255 				asid);
6256 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6257 				asid);
6258 	}
6259 
6260 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6261 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6262 				asid);
6263 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6264 				asid);
6265 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6266 				asid);
6267 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6268 				asid);
6269 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6270 				asid);
6271 	}
6272 
6273 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6274 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6275 				asid);
6276 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6277 				asid);
6278 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6279 				asid);
6280 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6281 				asid);
6282 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6283 				asid);
6284 	}
6285 
6286 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6287 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6288 				asid);
6289 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6290 				asid);
6291 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6292 				asid);
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6296 				asid);
6297 	}
6298 
6299 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6301 				asid);
6302 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6303 				asid);
6304 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6305 				asid);
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6309 				asid);
6310 	}
6311 
6312 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6314 				asid);
6315 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6316 				asid);
6317 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6318 				asid);
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6322 				asid);
6323 	}
6324 
6325 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6327 				asid);
6328 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6329 				asid);
6330 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6331 				asid);
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6335 				asid);
6336 	}
6337 
6338 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6339 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6340 }
6341 
6342 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6343 		struct hl_cs_job *job)
6344 {
6345 	struct packet_msg_prot *fence_pkt;
6346 	u32 *fence_ptr;
6347 	dma_addr_t fence_dma_addr;
6348 	struct hl_cb *cb;
6349 	u32 tmp, timeout, dma_offset;
6350 	int rc;
6351 
6352 	if (hdev->pldm)
6353 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6354 	else
6355 		timeout = HL_DEVICE_TIMEOUT_USEC;
6356 
6357 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6358 	if (!fence_ptr) {
6359 		dev_err(hdev->dev,
6360 			"Failed to allocate fence memory for QMAN0\n");
6361 		return -ENOMEM;
6362 	}
6363 
6364 	cb = job->patched_cb;
6365 
6366 	fence_pkt = cb->kernel_address +
6367 			job->job_cb_size - sizeof(struct packet_msg_prot);
6368 
6369 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6370 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6371 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6372 
6373 	fence_pkt->ctl = cpu_to_le32(tmp);
6374 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6375 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6376 
6377 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6378 
6379 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6380 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6381 
6382 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6383 					job->job_cb_size, cb->bus_address);
6384 	if (rc) {
6385 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6386 		goto free_fence_ptr;
6387 	}
6388 
6389 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6390 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6391 				timeout, true);
6392 
6393 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6394 
6395 	if (rc == -ETIMEDOUT) {
6396 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6397 		goto free_fence_ptr;
6398 	}
6399 
6400 free_fence_ptr:
6401 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6402 
6403 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6404 	return rc;
6405 }
6406 
6407 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6408 {
6409 	if (event_type >= GAUDI_EVENT_SIZE)
6410 		goto event_not_supported;
6411 
6412 	if (!gaudi_irq_map_table[event_type].valid)
6413 		goto event_not_supported;
6414 
6415 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6416 
6417 	return;
6418 
6419 event_not_supported:
6420 	snprintf(desc, size, "N/A");
6421 }
6422 
6423 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6424 							bool is_write, u16 *engine_id_1,
6425 							u16 *engine_id_2)
6426 {
6427 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6428 
6429 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6430 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6431 
6432 	switch (x_y) {
6433 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6434 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6435 		dma_id[0] = 0;
6436 		dma_id[1] = 2;
6437 		break;
6438 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6439 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6440 		dma_id[0] = 1;
6441 		dma_id[1] = 3;
6442 		break;
6443 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6444 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6445 		dma_id[0] = 4;
6446 		dma_id[1] = 6;
6447 		break;
6448 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6449 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6450 		dma_id[0] = 5;
6451 		dma_id[1] = 7;
6452 		break;
6453 	default:
6454 		goto unknown_initiator;
6455 	}
6456 
6457 	for (i = 0 ; i < 2 ; i++) {
6458 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6459 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6460 	}
6461 
6462 	switch (x_y) {
6463 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6464 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6465 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6466 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6467 			return "DMA0";
6468 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6469 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6470 			return "DMA2";
6471 		} else {
6472 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6473 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6474 			return "DMA0 or DMA2";
6475 		}
6476 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6477 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6478 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6479 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6480 			return "DMA1";
6481 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6482 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6483 			return "DMA3";
6484 		} else {
6485 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6486 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6487 			return "DMA1 or DMA3";
6488 		}
6489 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6490 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6491 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6492 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6493 			return "DMA4";
6494 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6495 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6496 			return "DMA6";
6497 		} else {
6498 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6499 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6500 			return "DMA4 or DMA6";
6501 		}
6502 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6503 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6504 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6505 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6506 			return "DMA5";
6507 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6508 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6509 			return "DMA7";
6510 		} else {
6511 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6512 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6513 			return "DMA5 or DMA7";
6514 		}
6515 	}
6516 
6517 unknown_initiator:
6518 	return "unknown initiator";
6519 }
6520 
6521 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6522 							u16 *engine_id_1, u16 *engine_id_2)
6523 {
6524 	u32 val, x_y, axi_id;
6525 
6526 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6527 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6528 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6529 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6530 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6531 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6532 
6533 	switch (x_y) {
6534 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6535 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6536 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6537 			return "TPC0";
6538 		}
6539 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6540 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6541 			return "NIC0";
6542 		}
6543 		break;
6544 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6545 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6546 		return "TPC1";
6547 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6548 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6549 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6550 		return "MME0";
6551 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6552 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6553 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6554 		return "MME1";
6555 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6556 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6557 		return "TPC2";
6558 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6559 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6560 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6561 			return "TPC3";
6562 		}
6563 		/* PCI, CPU or PSOC does not have engine id*/
6564 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6565 			return "PCI";
6566 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6567 			return "CPU";
6568 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6569 			return "PSOC";
6570 		break;
6571 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6572 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6573 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6574 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6575 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6576 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6577 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6578 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6579 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6580 				engine_id_1, engine_id_2);
6581 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6582 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6583 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6584 			return "TPC4";
6585 		}
6586 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6587 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6588 			return "NIC1";
6589 		}
6590 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6591 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6592 			return "NIC2";
6593 		}
6594 		break;
6595 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6596 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6597 		return "TPC5";
6598 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6599 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6600 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6601 		return "MME2";
6602 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6603 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6604 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6605 		return "MME3";
6606 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6607 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6608 		return "TPC6";
6609 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6610 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6611 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6612 			return "TPC7";
6613 		}
6614 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6615 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6616 			return "NIC4";
6617 		}
6618 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6619 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6620 			return "NIC5";
6621 		}
6622 		break;
6623 	default:
6624 		break;
6625 	}
6626 
6627 	dev_err(hdev->dev,
6628 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6629 		val,
6630 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6631 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6632 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6633 			RAZWI_INITIATOR_AXI_ID_MASK);
6634 
6635 	return "unknown initiator";
6636 }
6637 
6638 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6639 						u16 *engine_id_2, bool *is_read, bool *is_write)
6640 {
6641 
6642 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6643 		dev_err_ratelimited(hdev->dev,
6644 			"RAZWI event caused by illegal write of %s\n",
6645 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6646 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6647 		*is_write = true;
6648 	}
6649 
6650 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6651 		dev_err_ratelimited(hdev->dev,
6652 			"RAZWI event caused by illegal read of %s\n",
6653 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6654 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6655 		*is_read = true;
6656 	}
6657 }
6658 
6659 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6660 {
6661 	struct gaudi_device *gaudi = hdev->asic_specific;
6662 	u32 val;
6663 
6664 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6665 		return;
6666 
6667 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6668 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6669 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6670 		*addr <<= 32;
6671 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6672 
6673 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6674 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6675 
6676 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6677 	}
6678 
6679 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6680 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6681 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6682 		*addr <<= 32;
6683 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6684 
6685 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6686 
6687 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6688 	}
6689 }
6690 
6691 /*
6692  *  +-------------------+------------------------------------------------------+
6693  *  | Configuration Reg |                     Description                      |
6694  *  |      Address      |                                                      |
6695  *  +-------------------+------------------------------------------------------+
6696  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6697  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6698  *  |                   |0xF34 memory wrappers 63:32                           |
6699  *  |                   |0xF38 memory wrappers 95:64                           |
6700  *  |                   |0xF3C memory wrappers 127:96                          |
6701  *  +-------------------+------------------------------------------------------+
6702  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6703  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6704  *  |                   |0xF44 memory wrappers 63:32                           |
6705  *  |                   |0xF48 memory wrappers 95:64                           |
6706  *  |                   |0xF4C memory wrappers 127:96                          |
6707  *  +-------------------+------------------------------------------------------+
6708  */
6709 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6710 		struct ecc_info_extract_params *params, u64 *ecc_address,
6711 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6712 {
6713 	u32 i, num_mem_regs, reg, err_bit;
6714 	u64 err_addr, err_word = 0;
6715 
6716 	num_mem_regs = params->num_memories / 32 +
6717 			((params->num_memories % 32) ? 1 : 0);
6718 
6719 	if (params->block_address >= CFG_BASE)
6720 		params->block_address -= CFG_BASE;
6721 
6722 	if (params->derr)
6723 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6724 	else
6725 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6726 
6727 	/* Set invalid wrapper index */
6728 	*memory_wrapper_idx = 0xFF;
6729 
6730 	/* Iterate through memory wrappers, a single bit must be set */
6731 	for (i = 0 ; i < num_mem_regs ; i++) {
6732 		err_addr += i * 4;
6733 		err_word = RREG32(err_addr);
6734 		if (err_word) {
6735 			err_bit = __ffs(err_word);
6736 			*memory_wrapper_idx = err_bit + (32 * i);
6737 			break;
6738 		}
6739 	}
6740 
6741 	if (*memory_wrapper_idx == 0xFF) {
6742 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6743 		return -EINVAL;
6744 	}
6745 
6746 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6747 			*memory_wrapper_idx);
6748 
6749 	*ecc_address =
6750 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6751 	*ecc_syndrom =
6752 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6753 
6754 	/* Clear error indication */
6755 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6756 	if (params->derr)
6757 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6758 	else
6759 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6760 
6761 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6762 
6763 	return 0;
6764 }
6765 
6766 /*
6767  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6768  *
6769  * @idx: the current pi/ci value
6770  * @q_len: the queue length (power of 2)
6771  *
6772  * @return the cyclically decremented index
6773  */
6774 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6775 {
6776 	u32 mask = q_len - 1;
6777 
6778 	/*
6779 	 * modular decrement is equivalent to adding (queue_size -1)
6780 	 * later we take LSBs to make sure the value is in the
6781 	 * range [0, queue_len - 1]
6782 	 */
6783 	return (idx + q_len - 1) & mask;
6784 }
6785 
6786 /**
6787  * gaudi_handle_sw_config_stream_data - print SW config stream data
6788  *
6789  * @hdev: pointer to the habanalabs device structure
6790  * @stream: the QMAN's stream
6791  * @qman_base: base address of QMAN registers block
6792  * @event_mask: mask of the last events occurred
6793  */
6794 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6795 						u64 qman_base, u64 event_mask)
6796 {
6797 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6798 	u32 cq_ptr_lo_off, size;
6799 
6800 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6801 
6802 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6803 						stream * cq_ptr_lo_off;
6804 	cq_ptr_hi = cq_ptr_lo +
6805 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6806 	cq_tsize = cq_ptr_lo +
6807 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6808 
6809 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6810 	size = RREG32(cq_tsize);
6811 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6812 							stream, cq_ptr, size);
6813 
6814 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6815 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6816 		hdev->captured_err_info.undef_opcode.cq_size = size;
6817 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6818 	}
6819 }
6820 
6821 /**
6822  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6823  *
6824  * @hdev: pointer to the habanalabs device structure
6825  * @qid_base: first QID of the QMAN (out of 4 streams)
6826  * @stream: the QMAN's stream
6827  * @qman_base: base address of QMAN registers block
6828  * @event_mask: mask of the last events occurred
6829  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6830  */
6831 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6832 						u32 stream, u64 qman_base,
6833 						u64 event_mask,
6834 						bool pr_sw_conf)
6835 {
6836 	u32 ci, qm_ci_stream_off, queue_len;
6837 	struct hl_hw_queue *q;
6838 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6839 	int i;
6840 
6841 	q = &hdev->kernel_queues[qid_base + stream];
6842 
6843 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6844 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6845 						stream * qm_ci_stream_off;
6846 
6847 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6848 					q->int_queue_len : HL_QUEUE_LENGTH;
6849 
6850 	hdev->asic_funcs->hw_queues_lock(hdev);
6851 
6852 	if (pr_sw_conf)
6853 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6854 
6855 	ci = RREG32(pq_ci);
6856 
6857 	/* we should start printing form ci -1 */
6858 	ci = gaudi_queue_idx_dec(ci, queue_len);
6859 	memset(addr, 0, sizeof(addr));
6860 
6861 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6862 		struct hl_bd *bd;
6863 		u32 len;
6864 
6865 		bd = q->kernel_address;
6866 		bd += ci;
6867 
6868 		len = le32_to_cpu(bd->len);
6869 		/* len 0 means uninitialized entry- break */
6870 		if (!len)
6871 			break;
6872 
6873 		addr[i] = le64_to_cpu(bd->ptr);
6874 
6875 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6876 							stream, ci, addr[i], len);
6877 
6878 		/* get previous ci, wrap if needed */
6879 		ci = gaudi_queue_idx_dec(ci, queue_len);
6880 	}
6881 
6882 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6883 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6884 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6885 
6886 		if (arr_idx == 0) {
6887 			undef_opcode->timestamp = ktime_get();
6888 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6889 		}
6890 
6891 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6892 		undef_opcode->cb_addr_streams_len++;
6893 	}
6894 
6895 	hdev->asic_funcs->hw_queues_unlock(hdev);
6896 }
6897 
6898 /**
6899  * handle_qman_data_on_err - extract QMAN data on error
6900  *
6901  * @hdev: pointer to the habanalabs device structure
6902  * @qid_base: first QID of the QMAN (out of 4 streams)
6903  * @stream: the QMAN's stream
6904  * @qman_base: base address of QMAN registers block
6905  * @event_mask: mask of the last events occurred
6906  *
6907  * This function attempt to exatract as much data as possible on QMAN error.
6908  * On upper CP print the SW config stream data and last 8 PQEs.
6909  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6910  */
6911 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6912 				   u32 stream, u64 qman_base, u64 event_mask)
6913 {
6914 	u32 i;
6915 
6916 	if (stream != QMAN_STREAMS) {
6917 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6918 			qman_base, event_mask, true);
6919 		return;
6920 	}
6921 
6922 	/* handle Lower-CP */
6923 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6924 
6925 	for (i = 0; i < QMAN_STREAMS; i++)
6926 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6927 			qman_base, event_mask, false);
6928 }
6929 
6930 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6931 					  const char *qm_name,
6932 					  u64 qman_base,
6933 					  u32 qid_base,
6934 					  u64 *event_mask)
6935 {
6936 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6937 	u64 glbl_sts_addr, arb_err_addr;
6938 	char reg_desc[32];
6939 
6940 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6941 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6942 
6943 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6944 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6945 		glbl_sts_clr_val = 0;
6946 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6947 
6948 		if (!glbl_sts_val)
6949 			continue;
6950 
6951 		if (i == QMAN_STREAMS)
6952 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6953 		else
6954 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6955 
6956 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6957 			if (glbl_sts_val & BIT(j)) {
6958 				dev_err_ratelimited(hdev->dev,
6959 						"%s %s. err cause: %s\n",
6960 						qm_name, reg_desc,
6961 						gaudi_qman_error_cause[j]);
6962 				glbl_sts_clr_val |= BIT(j);
6963 			}
6964 		}
6965 		/* check for undefined opcode */
6966 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6967 				hdev->captured_err_info.undef_opcode.write_enable) {
6968 			memset(&hdev->captured_err_info.undef_opcode, 0,
6969 						sizeof(hdev->captured_err_info.undef_opcode));
6970 
6971 			hdev->captured_err_info.undef_opcode.write_enable = false;
6972 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6973 		}
6974 
6975 		/* Write 1 clear errors */
6976 		if (!hdev->stop_on_err)
6977 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6978 		else
6979 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6980 	}
6981 
6982 	arb_err_val = RREG32(arb_err_addr);
6983 
6984 	if (!arb_err_val)
6985 		return;
6986 
6987 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6988 		if (arb_err_val & BIT(j)) {
6989 			dev_err_ratelimited(hdev->dev,
6990 					"%s ARB_ERR. err cause: %s\n",
6991 					qm_name,
6992 					gaudi_qman_arb_error_cause[j]);
6993 		}
6994 	}
6995 }
6996 
6997 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
6998 		struct hl_eq_sm_sei_data *sei_data)
6999 {
7000 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7001 
7002 	/* Flip the bits as the enum is ordered in the opposite way */
7003 	index = (index ^ 0x3) & 0x3;
7004 
7005 	switch (sei_data->sei_cause) {
7006 	case SM_SEI_SO_OVERFLOW:
7007 		dev_err_ratelimited(hdev->dev,
7008 			"%s SEI Error: SOB Group %u overflow/underflow",
7009 			gaudi_sync_manager_names[index],
7010 			le32_to_cpu(sei_data->sei_log));
7011 		break;
7012 	case SM_SEI_LBW_4B_UNALIGNED:
7013 		dev_err_ratelimited(hdev->dev,
7014 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7015 			gaudi_sync_manager_names[index],
7016 			le32_to_cpu(sei_data->sei_log));
7017 		break;
7018 	case SM_SEI_AXI_RESPONSE_ERR:
7019 		dev_err_ratelimited(hdev->dev,
7020 			"%s SEI Error: AXI ID %u response error",
7021 			gaudi_sync_manager_names[index],
7022 			le32_to_cpu(sei_data->sei_log));
7023 		break;
7024 	default:
7025 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7026 				le32_to_cpu(sei_data->sei_log));
7027 		break;
7028 	}
7029 }
7030 
7031 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7032 		struct hl_eq_ecc_data *ecc_data)
7033 {
7034 	struct ecc_info_extract_params params;
7035 	u64 ecc_address = 0, ecc_syndrom = 0;
7036 	u8 index, memory_wrapper_idx = 0;
7037 	bool extract_info_from_fw;
7038 	int rc;
7039 
7040 	if (hdev->asic_prop.fw_security_enabled) {
7041 		extract_info_from_fw = true;
7042 		goto extract_ecc_info;
7043 	}
7044 
7045 	switch (event_type) {
7046 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7047 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7048 		extract_info_from_fw = true;
7049 		break;
7050 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7051 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7052 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7053 		params.num_memories = 90;
7054 		params.derr = false;
7055 		extract_info_from_fw = false;
7056 		break;
7057 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7058 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7059 		params.block_address =
7060 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7061 		params.num_memories = 90;
7062 		params.derr = true;
7063 		extract_info_from_fw = false;
7064 		break;
7065 	case GAUDI_EVENT_MME0_ACC_SERR:
7066 	case GAUDI_EVENT_MME1_ACC_SERR:
7067 	case GAUDI_EVENT_MME2_ACC_SERR:
7068 	case GAUDI_EVENT_MME3_ACC_SERR:
7069 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7070 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7071 		params.num_memories = 128;
7072 		params.derr = false;
7073 		extract_info_from_fw = false;
7074 		break;
7075 	case GAUDI_EVENT_MME0_ACC_DERR:
7076 	case GAUDI_EVENT_MME1_ACC_DERR:
7077 	case GAUDI_EVENT_MME2_ACC_DERR:
7078 	case GAUDI_EVENT_MME3_ACC_DERR:
7079 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7080 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7081 		params.num_memories = 128;
7082 		params.derr = true;
7083 		extract_info_from_fw = false;
7084 		break;
7085 	case GAUDI_EVENT_MME0_SBAB_SERR:
7086 	case GAUDI_EVENT_MME1_SBAB_SERR:
7087 	case GAUDI_EVENT_MME2_SBAB_SERR:
7088 	case GAUDI_EVENT_MME3_SBAB_SERR:
7089 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7090 		params.block_address =
7091 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7092 		params.num_memories = 33;
7093 		params.derr = false;
7094 		extract_info_from_fw = false;
7095 		break;
7096 	case GAUDI_EVENT_MME0_SBAB_DERR:
7097 	case GAUDI_EVENT_MME1_SBAB_DERR:
7098 	case GAUDI_EVENT_MME2_SBAB_DERR:
7099 	case GAUDI_EVENT_MME3_SBAB_DERR:
7100 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7101 		params.block_address =
7102 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7103 		params.num_memories = 33;
7104 		params.derr = true;
7105 		extract_info_from_fw = false;
7106 		break;
7107 	default:
7108 		return;
7109 	}
7110 
7111 extract_ecc_info:
7112 	if (extract_info_from_fw) {
7113 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7114 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7115 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7116 	} else {
7117 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7118 				&ecc_syndrom, &memory_wrapper_idx);
7119 		if (rc)
7120 			return;
7121 	}
7122 
7123 	dev_err(hdev->dev,
7124 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7125 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7126 }
7127 
7128 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7129 {
7130 	u64 qman_base;
7131 	char desc[32];
7132 	u32 qid_base;
7133 	u8 index;
7134 
7135 	switch (event_type) {
7136 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7137 		index = event_type - GAUDI_EVENT_TPC0_QM;
7138 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7139 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7140 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7141 		break;
7142 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7143 		if (event_type == GAUDI_EVENT_MME0_QM) {
7144 			index = 0;
7145 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7146 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7147 			index = 2;
7148 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7149 		}
7150 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7151 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7152 		break;
7153 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7154 		index = event_type - GAUDI_EVENT_DMA0_QM;
7155 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7156 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7157 		if (index > 1)
7158 			qid_base++;
7159 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7160 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7161 		break;
7162 	case GAUDI_EVENT_NIC0_QM0:
7163 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7164 		qman_base = mmNIC0_QM0_BASE;
7165 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7166 		break;
7167 	case GAUDI_EVENT_NIC0_QM1:
7168 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7169 		qman_base = mmNIC0_QM1_BASE;
7170 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7171 		break;
7172 	case GAUDI_EVENT_NIC1_QM0:
7173 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7174 		qman_base = mmNIC1_QM0_BASE;
7175 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7176 		break;
7177 	case GAUDI_EVENT_NIC1_QM1:
7178 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7179 		qman_base = mmNIC1_QM1_BASE;
7180 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7181 		break;
7182 	case GAUDI_EVENT_NIC2_QM0:
7183 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7184 		qman_base = mmNIC2_QM0_BASE;
7185 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7186 		break;
7187 	case GAUDI_EVENT_NIC2_QM1:
7188 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7189 		qman_base = mmNIC2_QM1_BASE;
7190 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7191 		break;
7192 	case GAUDI_EVENT_NIC3_QM0:
7193 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7194 		qman_base = mmNIC3_QM0_BASE;
7195 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7196 		break;
7197 	case GAUDI_EVENT_NIC3_QM1:
7198 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7199 		qman_base = mmNIC3_QM1_BASE;
7200 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7201 		break;
7202 	case GAUDI_EVENT_NIC4_QM0:
7203 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7204 		qman_base = mmNIC4_QM0_BASE;
7205 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7206 		break;
7207 	case GAUDI_EVENT_NIC4_QM1:
7208 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7209 		qman_base = mmNIC4_QM1_BASE;
7210 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7211 		break;
7212 	default:
7213 		return;
7214 	}
7215 
7216 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7217 }
7218 
7219 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7220 					bool check_razwi, u64 *event_mask)
7221 {
7222 	bool is_read = false, is_write = false;
7223 	u16 engine_id[2], num_of_razwi_eng = 0;
7224 	char desc[64] = "";
7225 	u64 razwi_addr = 0;
7226 	u8 razwi_flags = 0;
7227 
7228 	/*
7229 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7230 	 * engine id it will get valid value.
7231 	 */
7232 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7233 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7234 
7235 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7236 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7237 		event_type, desc);
7238 
7239 	if (check_razwi) {
7240 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7241 						&is_write);
7242 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7243 
7244 		if (is_read)
7245 			razwi_flags |= HL_RAZWI_READ;
7246 		if (is_write)
7247 			razwi_flags |= HL_RAZWI_WRITE;
7248 
7249 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7250 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7251 				num_of_razwi_eng = 2;
7252 			else
7253 				num_of_razwi_eng = 1;
7254 		}
7255 
7256 		if (razwi_flags)
7257 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7258 					razwi_flags, event_mask);
7259 	}
7260 }
7261 
7262 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7263 					struct cpucp_pkt_sync_err *sync_err)
7264 {
7265 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7266 
7267 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7268 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7269 }
7270 
7271 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7272 					struct hl_eq_fw_alive *fw_alive)
7273 {
7274 	dev_err(hdev->dev,
7275 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7276 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7277 		le32_to_cpu(fw_alive->process_id),
7278 		le32_to_cpu(fw_alive->thread_id),
7279 		le64_to_cpu(fw_alive->uptime_seconds));
7280 }
7281 
7282 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7283 						void *data)
7284 {
7285 	char desc[64] = "", *type;
7286 	struct eq_nic_sei_event *eq_nic_sei = data;
7287 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7288 
7289 	switch (eq_nic_sei->axi_error_cause) {
7290 	case RXB:
7291 		type = "RXB";
7292 		break;
7293 	case RXE:
7294 		type = "RXE";
7295 		break;
7296 	case TXS:
7297 		type = "TXS";
7298 		break;
7299 	case TXE:
7300 		type = "TXE";
7301 		break;
7302 	case QPC_RESP:
7303 		type = "QPC_RESP";
7304 		break;
7305 	case NON_AXI_ERR:
7306 		type = "NON_AXI_ERR";
7307 		break;
7308 	case TMR:
7309 		type = "TMR";
7310 		break;
7311 	default:
7312 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7313 			eq_nic_sei->axi_error_cause);
7314 		type = "N/A";
7315 		break;
7316 	}
7317 
7318 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7319 			eq_nic_sei->id);
7320 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7321 		event_type, desc);
7322 }
7323 
7324 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7325 {
7326 	/* GAUDI doesn't support any reset except hard-reset */
7327 	return -EPERM;
7328 }
7329 
7330 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7331 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7332 {
7333 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7334 	int rc = 0;
7335 
7336 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7337 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7338 		if (!hbm_ecc_data) {
7339 			dev_err(hdev->dev, "No FW ECC data");
7340 			return 0;
7341 		}
7342 
7343 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7344 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7345 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7346 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7347 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7348 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7349 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7350 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7351 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7352 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7354 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7356 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357 
7358 		dev_err(hdev->dev,
7359 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7360 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7361 		dev_err(hdev->dev,
7362 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7363 			device, ch, hbm_ecc_data->first_addr, type,
7364 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7365 			hbm_ecc_data->dec_cnt);
7366 		return 0;
7367 	}
7368 
7369 	if (hdev->asic_prop.fw_security_enabled) {
7370 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7371 		return 0;
7372 	}
7373 
7374 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7375 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7376 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7377 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7378 		if (val) {
7379 			rc = -EIO;
7380 			dev_err(hdev->dev,
7381 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7382 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7383 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7384 				(val >> 4) & 0x1);
7385 
7386 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7387 			dev_err(hdev->dev,
7388 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7389 				device, ch * 2,
7390 				RREG32(base + ch * 0x1000 + 0x064),
7391 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7392 				(val2 & 0xFF0000) >> 16,
7393 				(val2 & 0xFF000000) >> 24);
7394 		}
7395 
7396 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7397 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7398 		if (val) {
7399 			rc = -EIO;
7400 			dev_err(hdev->dev,
7401 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7402 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7403 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7404 				(val >> 4) & 0x1);
7405 
7406 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7407 			dev_err(hdev->dev,
7408 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7409 				device, ch * 2 + 1,
7410 				RREG32(base + ch * 0x1000 + 0x074),
7411 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7412 				(val2 & 0xFF0000) >> 16,
7413 				(val2 & 0xFF000000) >> 24);
7414 		}
7415 
7416 		/* Clear interrupts */
7417 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7418 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7419 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7420 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7421 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7422 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7423 	}
7424 
7425 	val  = RREG32(base + 0x8F30);
7426 	val2 = RREG32(base + 0x8F34);
7427 	if (val | val2) {
7428 		rc = -EIO;
7429 		dev_err(hdev->dev,
7430 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7431 			device, val, val2);
7432 	}
7433 	val  = RREG32(base + 0x8F40);
7434 	val2 = RREG32(base + 0x8F44);
7435 	if (val | val2) {
7436 		rc = -EIO;
7437 		dev_err(hdev->dev,
7438 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7439 			device, val, val2);
7440 	}
7441 
7442 	return rc;
7443 }
7444 
7445 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7446 {
7447 	switch (hbm_event_type) {
7448 	case GAUDI_EVENT_HBM0_SPI_0:
7449 	case GAUDI_EVENT_HBM0_SPI_1:
7450 		return 0;
7451 	case GAUDI_EVENT_HBM1_SPI_0:
7452 	case GAUDI_EVENT_HBM1_SPI_1:
7453 		return 1;
7454 	case GAUDI_EVENT_HBM2_SPI_0:
7455 	case GAUDI_EVENT_HBM2_SPI_1:
7456 		return 2;
7457 	case GAUDI_EVENT_HBM3_SPI_0:
7458 	case GAUDI_EVENT_HBM3_SPI_1:
7459 		return 3;
7460 	default:
7461 		break;
7462 	}
7463 
7464 	/* Should never happen */
7465 	return 0;
7466 }
7467 
7468 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7469 					char *interrupt_name)
7470 {
7471 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7472 	bool soft_reset_required = false;
7473 
7474 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7475 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7476 
7477 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7478 		if (tpc_interrupts_cause & BIT(i)) {
7479 			dev_err_ratelimited(hdev->dev,
7480 					"TPC%d_%s interrupt cause: %s\n",
7481 					tpc_id, interrupt_name,
7482 					gaudi_tpc_interrupts_cause[i]);
7483 			/* If this is QM error, we need to soft-reset */
7484 			if (i == 15)
7485 				soft_reset_required = true;
7486 		}
7487 
7488 	/* Clear interrupts */
7489 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7490 
7491 	return soft_reset_required;
7492 }
7493 
7494 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7495 {
7496 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7497 }
7498 
7499 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7500 {
7501 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7502 }
7503 
7504 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7505 {
7506 	ktime_t zero_time = ktime_set(0, 0);
7507 
7508 	mutex_lock(&hdev->clk_throttling.lock);
7509 
7510 	switch (event_type) {
7511 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7512 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7513 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7514 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7515 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7516 		dev_info_ratelimited(hdev->dev,
7517 			"Clock throttling due to power consumption\n");
7518 		break;
7519 
7520 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7521 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7522 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7523 		dev_info_ratelimited(hdev->dev,
7524 			"Power envelop is safe, back to optimal clock\n");
7525 		break;
7526 
7527 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7528 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7529 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7530 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7531 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7532 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7533 		dev_info_ratelimited(hdev->dev,
7534 			"Clock throttling due to overheating\n");
7535 		break;
7536 
7537 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7538 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7539 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7540 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7541 		dev_info_ratelimited(hdev->dev,
7542 			"Thermal envelop is safe, back to optimal clock\n");
7543 		break;
7544 
7545 	default:
7546 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7547 			event_type);
7548 		break;
7549 	}
7550 
7551 	mutex_unlock(&hdev->clk_throttling.lock);
7552 }
7553 
7554 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7555 {
7556 	struct gaudi_device *gaudi = hdev->asic_specific;
7557 	struct hl_info_fw_err_info fw_err_info;
7558 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7559 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7560 	u32 fw_fatal_err_flag = 0, flags = 0;
7561 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7562 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7563 	bool reset_required, reset_direct = false;
7564 	u8 cause;
7565 	int rc;
7566 
7567 	if (event_type >= GAUDI_EVENT_SIZE) {
7568 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7569 				event_type, GAUDI_EVENT_SIZE - 1);
7570 		return;
7571 	}
7572 
7573 	gaudi->events_stat[event_type]++;
7574 	gaudi->events_stat_aggregate[event_type]++;
7575 
7576 	switch (event_type) {
7577 	case GAUDI_EVENT_PCIE_CORE_DERR:
7578 	case GAUDI_EVENT_PCIE_IF_DERR:
7579 	case GAUDI_EVENT_PCIE_PHY_DERR:
7580 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7581 	case GAUDI_EVENT_MME0_ACC_DERR:
7582 	case GAUDI_EVENT_MME0_SBAB_DERR:
7583 	case GAUDI_EVENT_MME1_ACC_DERR:
7584 	case GAUDI_EVENT_MME1_SBAB_DERR:
7585 	case GAUDI_EVENT_MME2_ACC_DERR:
7586 	case GAUDI_EVENT_MME2_SBAB_DERR:
7587 	case GAUDI_EVENT_MME3_ACC_DERR:
7588 	case GAUDI_EVENT_MME3_SBAB_DERR:
7589 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7590 		fallthrough;
7591 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7592 	case GAUDI_EVENT_PSOC_MEM_DERR:
7593 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7594 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7595 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7596 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7597 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7598 	case GAUDI_EVENT_MMU_DERR:
7599 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7600 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7601 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7602 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7603 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7604 		goto reset_device;
7605 
7606 	case GAUDI_EVENT_GIC500:
7607 	case GAUDI_EVENT_AXI_ECC:
7608 	case GAUDI_EVENT_L2_RAM_ECC:
7609 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7610 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7611 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7612 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7613 		goto reset_device;
7614 
7615 	case GAUDI_EVENT_HBM0_SPI_0:
7616 	case GAUDI_EVENT_HBM1_SPI_0:
7617 	case GAUDI_EVENT_HBM2_SPI_0:
7618 	case GAUDI_EVENT_HBM3_SPI_0:
7619 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7620 		gaudi_hbm_read_interrupts(hdev,
7621 				gaudi_hbm_event_to_dev(event_type),
7622 				&eq_entry->hbm_ecc_data);
7623 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7624 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7625 		goto reset_device;
7626 
7627 	case GAUDI_EVENT_HBM0_SPI_1:
7628 	case GAUDI_EVENT_HBM1_SPI_1:
7629 	case GAUDI_EVENT_HBM2_SPI_1:
7630 	case GAUDI_EVENT_HBM3_SPI_1:
7631 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7632 		gaudi_hbm_read_interrupts(hdev,
7633 				gaudi_hbm_event_to_dev(event_type),
7634 				&eq_entry->hbm_ecc_data);
7635 		hl_fw_unmask_irq(hdev, event_type);
7636 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7637 		break;
7638 
7639 	case GAUDI_EVENT_TPC0_DEC:
7640 	case GAUDI_EVENT_TPC1_DEC:
7641 	case GAUDI_EVENT_TPC2_DEC:
7642 	case GAUDI_EVENT_TPC3_DEC:
7643 	case GAUDI_EVENT_TPC4_DEC:
7644 	case GAUDI_EVENT_TPC5_DEC:
7645 	case GAUDI_EVENT_TPC6_DEC:
7646 	case GAUDI_EVENT_TPC7_DEC:
7647 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7648 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7649 		 * The SW upper layer will inspect an internal mapped area to indicate
7650 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7651 		 */
7652 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7653 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7654 		reset_required = gaudi_tpc_read_interrupts(hdev,
7655 					tpc_dec_event_to_tpc_id(event_type),
7656 					"AXI_SLV_DEC_Error");
7657 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7658 		if (reset_required) {
7659 			dev_err(hdev->dev, "reset required due to %s\n",
7660 				gaudi_irq_map_table[event_type].name);
7661 
7662 			reset_direct = true;
7663 			goto reset_device;
7664 		} else {
7665 			hl_fw_unmask_irq(hdev, event_type);
7666 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7667 		}
7668 		break;
7669 
7670 	case GAUDI_EVENT_TPC0_KRN_ERR:
7671 	case GAUDI_EVENT_TPC1_KRN_ERR:
7672 	case GAUDI_EVENT_TPC2_KRN_ERR:
7673 	case GAUDI_EVENT_TPC3_KRN_ERR:
7674 	case GAUDI_EVENT_TPC4_KRN_ERR:
7675 	case GAUDI_EVENT_TPC5_KRN_ERR:
7676 	case GAUDI_EVENT_TPC6_KRN_ERR:
7677 	case GAUDI_EVENT_TPC7_KRN_ERR:
7678 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7679 		reset_required = gaudi_tpc_read_interrupts(hdev,
7680 					tpc_krn_event_to_tpc_id(event_type),
7681 					"KRN_ERR");
7682 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7683 		if (reset_required) {
7684 			dev_err(hdev->dev, "reset required due to %s\n",
7685 				gaudi_irq_map_table[event_type].name);
7686 
7687 			reset_direct = true;
7688 			goto reset_device;
7689 		} else {
7690 			hl_fw_unmask_irq(hdev, event_type);
7691 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7692 		}
7693 		break;
7694 
7695 	case GAUDI_EVENT_PCIE_CORE_SERR:
7696 	case GAUDI_EVENT_PCIE_IF_SERR:
7697 	case GAUDI_EVENT_PCIE_PHY_SERR:
7698 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7699 	case GAUDI_EVENT_MME0_ACC_SERR:
7700 	case GAUDI_EVENT_MME0_SBAB_SERR:
7701 	case GAUDI_EVENT_MME1_ACC_SERR:
7702 	case GAUDI_EVENT_MME1_SBAB_SERR:
7703 	case GAUDI_EVENT_MME2_ACC_SERR:
7704 	case GAUDI_EVENT_MME2_SBAB_SERR:
7705 	case GAUDI_EVENT_MME3_ACC_SERR:
7706 	case GAUDI_EVENT_MME3_SBAB_SERR:
7707 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7708 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7709 	case GAUDI_EVENT_PSOC_MEM_SERR:
7710 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7711 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7712 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7713 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7714 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7715 		fallthrough;
7716 	case GAUDI_EVENT_MMU_SERR:
7717 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7718 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7719 		hl_fw_unmask_irq(hdev, event_type);
7720 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7721 		break;
7722 
7723 	case GAUDI_EVENT_PCIE_DEC:
7724 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7725 	case GAUDI_EVENT_PSOC_AXI_DEC:
7726 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7727 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7728 		hl_fw_unmask_irq(hdev, event_type);
7729 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7730 		break;
7731 
7732 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7733 	case GAUDI_EVENT_MMU_WR_PERM:
7734 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7735 		hl_fw_unmask_irq(hdev, event_type);
7736 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7737 		break;
7738 
7739 	case GAUDI_EVENT_MME0_WBC_RSP:
7740 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7741 	case GAUDI_EVENT_MME1_WBC_RSP:
7742 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7743 	case GAUDI_EVENT_MME2_WBC_RSP:
7744 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7745 	case GAUDI_EVENT_MME3_WBC_RSP:
7746 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7747 	case GAUDI_EVENT_RAZWI_OR_ADC:
7748 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7749 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7750 		fallthrough;
7751 	case GAUDI_EVENT_NIC0_QM0:
7752 	case GAUDI_EVENT_NIC0_QM1:
7753 	case GAUDI_EVENT_NIC1_QM0:
7754 	case GAUDI_EVENT_NIC1_QM1:
7755 	case GAUDI_EVENT_NIC2_QM0:
7756 	case GAUDI_EVENT_NIC2_QM1:
7757 	case GAUDI_EVENT_NIC3_QM0:
7758 	case GAUDI_EVENT_NIC3_QM1:
7759 	case GAUDI_EVENT_NIC4_QM0:
7760 	case GAUDI_EVENT_NIC4_QM1:
7761 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7762 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7763 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7764 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7765 		hl_fw_unmask_irq(hdev, event_type);
7766 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7767 		break;
7768 
7769 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7770 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7771 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7772 		goto reset_device;
7773 
7774 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7775 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7776 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7777 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7778 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7779 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7780 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7781 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7782 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7783 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7784 		hl_fw_unmask_irq(hdev, event_type);
7785 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7786 		break;
7787 
7788 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7789 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7790 		hl_fw_unmask_irq(hdev, event_type);
7791 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7792 		break;
7793 
7794 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7795 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7796 		gaudi_print_sm_sei_info(hdev, event_type,
7797 					&eq_entry->sm_sei_data);
7798 		rc = hl_state_dump(hdev);
7799 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7800 		if (rc)
7801 			dev_err(hdev->dev,
7802 				"Error during system state dump %d\n", rc);
7803 		hl_fw_unmask_irq(hdev, event_type);
7804 		break;
7805 
7806 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7807 		break;
7808 
7809 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7810 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7811 		hl_fw_unmask_irq(hdev, event_type);
7812 		break;
7813 
7814 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7815 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7816 		dev_err(hdev->dev,
7817 			"Received high temp H/W interrupt %d (cause %d)\n",
7818 			event_type, cause);
7819 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7820 		break;
7821 
7822 	case GAUDI_EVENT_DEV_RESET_REQ:
7823 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7824 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7825 		goto reset_device;
7826 
7827 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7828 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7829 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7830 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7831 		goto reset_device;
7832 
7833 	case GAUDI_EVENT_FW_ALIVE_S:
7834 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7835 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7836 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7837 		fw_err_info.event_id = event_type;
7838 		fw_err_info.event_mask = &event_mask;
7839 		hl_handle_fw_err(hdev, &fw_err_info);
7840 		goto reset_device;
7841 
7842 	default:
7843 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7844 				event_type);
7845 		break;
7846 	}
7847 
7848 	if (event_mask)
7849 		hl_notifier_event_send_all(hdev, event_mask);
7850 
7851 	return;
7852 
7853 reset_device:
7854 	reset_required = true;
7855 
7856 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7857 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7858 
7859 		/* notify on device unavailable while the reset triggered by fw */
7860 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7861 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7862 	} else if (hdev->hard_reset_on_fw_events) {
7863 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7864 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7865 	} else {
7866 		reset_required = false;
7867 	}
7868 
7869 	if (reset_required) {
7870 		/* escalate general hw errors to critical/fatal error */
7871 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7872 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7873 
7874 		hl_device_cond_reset(hdev, flags, event_mask);
7875 	} else {
7876 		hl_fw_unmask_irq(hdev, event_type);
7877 		/* Notification on occurred event needs to be sent although reset is not executed */
7878 		if (event_mask)
7879 			hl_notifier_event_send_all(hdev, event_mask);
7880 	}
7881 }
7882 
7883 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7884 {
7885 	struct gaudi_device *gaudi = hdev->asic_specific;
7886 
7887 	if (aggregate) {
7888 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7889 		return gaudi->events_stat_aggregate;
7890 	}
7891 
7892 	*size = (u32) sizeof(gaudi->events_stat);
7893 	return gaudi->events_stat;
7894 }
7895 
7896 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7897 {
7898 	struct gaudi_device *gaudi = hdev->asic_specific;
7899 	u32 status, timeout_usec;
7900 	int rc;
7901 
7902 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7903 		hdev->reset_info.hard_reset_pending)
7904 		return 0;
7905 
7906 	if (hdev->pldm)
7907 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7908 	else
7909 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7910 
7911 	/* L0 & L1 invalidation */
7912 	WREG32(mmSTLB_INV_PS, 3);
7913 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7914 	WREG32(mmSTLB_INV_PS, 2);
7915 
7916 	rc = hl_poll_timeout(
7917 		hdev,
7918 		mmSTLB_INV_PS,
7919 		status,
7920 		!status,
7921 		1000,
7922 		timeout_usec);
7923 
7924 	WREG32(mmSTLB_INV_SET, 0);
7925 
7926 	return rc;
7927 }
7928 
7929 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7930 						bool is_hard, u32 flags,
7931 						u32 asid, u64 va, u64 size)
7932 {
7933 	/* Treat as invalidate all because there is no range invalidation
7934 	 * in Gaudi
7935 	 */
7936 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7937 }
7938 
7939 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7940 {
7941 	u32 status, timeout_usec;
7942 	int rc;
7943 
7944 	if (hdev->pldm)
7945 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7946 	else
7947 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7948 
7949 	WREG32(MMU_ASID, asid);
7950 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7951 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7952 	WREG32(MMU_BUSY, 0x80000000);
7953 
7954 	rc = hl_poll_timeout(
7955 		hdev,
7956 		MMU_BUSY,
7957 		status,
7958 		!(status & 0x80000000),
7959 		1000,
7960 		timeout_usec);
7961 
7962 	if (rc) {
7963 		dev_err(hdev->dev,
7964 			"Timeout during MMU hop0 config of asid %d\n", asid);
7965 		return rc;
7966 	}
7967 
7968 	return 0;
7969 }
7970 
7971 static int gaudi_send_heartbeat(struct hl_device *hdev)
7972 {
7973 	struct gaudi_device *gaudi = hdev->asic_specific;
7974 
7975 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7976 		return 0;
7977 
7978 	return hl_fw_send_heartbeat(hdev);
7979 }
7980 
7981 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7982 {
7983 	struct gaudi_device *gaudi = hdev->asic_specific;
7984 	struct asic_fixed_properties *prop = &hdev->asic_prop;
7985 	int rc;
7986 
7987 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7988 		return 0;
7989 
7990 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7991 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7992 					mmCPU_BOOT_ERR1);
7993 	if (rc)
7994 		return rc;
7995 
7996 	if (!strlen(prop->cpucp_info.card_name))
7997 		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7998 				CARD_NAME_MAX_LEN);
7999 
8000 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8001 
8002 	set_default_power_values(hdev);
8003 
8004 	return 0;
8005 }
8006 
8007 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8008 		struct engines_data *e)
8009 {
8010 	struct gaudi_device *gaudi = hdev->asic_specific;
8011 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8012 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8013 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8014 	unsigned long *mask = (unsigned long *)mask_arr;
8015 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8016 	bool is_idle = true, is_eng_idle, is_slave;
8017 	u64 offset;
8018 	int i, dma_id, port;
8019 
8020 	if (e)
8021 		hl_engine_data_sprintf(e,
8022 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8023 			"---  -------  ------------  ----------  -------------\n");
8024 
8025 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8026 		dma_id = gaudi_dma_assignment[i];
8027 		offset = dma_id * DMA_QMAN_OFFSET;
8028 
8029 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8030 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8031 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8032 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8033 				IS_DMA_IDLE(dma_core_sts0);
8034 		is_idle &= is_eng_idle;
8035 
8036 		if (mask && !is_eng_idle)
8037 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8038 		if (e)
8039 			hl_engine_data_sprintf(e, fmt, dma_id,
8040 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8041 				qm_cgm_sts, dma_core_sts0);
8042 	}
8043 
8044 	if (e)
8045 		hl_engine_data_sprintf(e,
8046 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8047 			"---  -------  ------------  ----------  ----------\n");
8048 
8049 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8050 		offset = i * TPC_QMAN_OFFSET;
8051 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8052 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8053 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8054 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8055 				IS_TPC_IDLE(tpc_cfg_sts);
8056 		is_idle &= is_eng_idle;
8057 
8058 		if (mask && !is_eng_idle)
8059 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8060 		if (e)
8061 			hl_engine_data_sprintf(e, fmt, i,
8062 				is_eng_idle ? "Y" : "N",
8063 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8064 	}
8065 
8066 	if (e)
8067 		hl_engine_data_sprintf(e,
8068 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8069 			"---  -------  ------------  ----------  -----------\n");
8070 
8071 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8072 		offset = i * MME_QMAN_OFFSET;
8073 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8074 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8075 
8076 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8077 		is_slave = i % 2;
8078 		if (!is_slave) {
8079 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8080 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8081 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8082 		}
8083 
8084 		is_idle &= is_eng_idle;
8085 
8086 		if (mask && !is_eng_idle)
8087 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8088 		if (e) {
8089 			if (!is_slave)
8090 				hl_engine_data_sprintf(e, fmt, i,
8091 					is_eng_idle ? "Y" : "N",
8092 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8093 			else
8094 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8095 					is_eng_idle ? "Y" : "N", "-",
8096 					"-", mme_arch_sts);
8097 		}
8098 	}
8099 
8100 	if (e)
8101 		hl_engine_data_sprintf(e,
8102 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8103 				"---  -------  ------------  ----------\n");
8104 
8105 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8106 		offset = i * NIC_MACRO_QMAN_OFFSET;
8107 		port = 2 * i;
8108 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8109 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8110 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8111 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8112 			is_idle &= is_eng_idle;
8113 
8114 			if (mask && !is_eng_idle)
8115 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8116 			if (e)
8117 				hl_engine_data_sprintf(e, nic_fmt, port,
8118 						is_eng_idle ? "Y" : "N",
8119 						qm_glbl_sts0, qm_cgm_sts);
8120 		}
8121 
8122 		port = 2 * i + 1;
8123 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8124 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8125 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8126 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8127 			is_idle &= is_eng_idle;
8128 
8129 			if (mask && !is_eng_idle)
8130 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8131 			if (e)
8132 				hl_engine_data_sprintf(e, nic_fmt, port,
8133 						is_eng_idle ? "Y" : "N",
8134 						qm_glbl_sts0, qm_cgm_sts);
8135 		}
8136 	}
8137 
8138 	if (e)
8139 		hl_engine_data_sprintf(e, "\n");
8140 
8141 	return is_idle;
8142 }
8143 
8144 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8145 	__acquires(&gaudi->hw_queues_lock)
8146 {
8147 	struct gaudi_device *gaudi = hdev->asic_specific;
8148 
8149 	spin_lock(&gaudi->hw_queues_lock);
8150 }
8151 
8152 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8153 	__releases(&gaudi->hw_queues_lock)
8154 {
8155 	struct gaudi_device *gaudi = hdev->asic_specific;
8156 
8157 	spin_unlock(&gaudi->hw_queues_lock);
8158 }
8159 
8160 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8161 {
8162 	return hdev->pdev->device;
8163 }
8164 
8165 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8166 				size_t max_size)
8167 {
8168 	struct gaudi_device *gaudi = hdev->asic_specific;
8169 
8170 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8171 		return 0;
8172 
8173 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8174 }
8175 
8176 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8177 {
8178 	struct gaudi_device *gaudi = hdev->asic_specific;
8179 
8180 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8181 		return 0;
8182 
8183 	return hl_fw_get_monitor_dump(hdev, data);
8184 }
8185 
8186 /*
8187  * this function should be used only during initialization and/or after reset,
8188  * when there are no active users.
8189  */
8190 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8191 {
8192 	u64 kernel_timeout;
8193 	u32 status, offset;
8194 	int rc;
8195 
8196 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8197 
8198 	if (hdev->pldm)
8199 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8200 	else
8201 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8202 
8203 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8204 			lower_32_bits(tpc_kernel));
8205 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8206 			upper_32_bits(tpc_kernel));
8207 
8208 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8209 			lower_32_bits(tpc_kernel));
8210 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8211 			upper_32_bits(tpc_kernel));
8212 	/* set a valid LUT pointer, content is of no significance */
8213 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8214 			lower_32_bits(tpc_kernel));
8215 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8216 			upper_32_bits(tpc_kernel));
8217 
8218 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8219 			lower_32_bits(CFG_BASE +
8220 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8221 
8222 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8223 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8224 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8225 	/* wait a bit for the engine to start executing */
8226 	usleep_range(1000, 1500);
8227 
8228 	/* wait until engine has finished executing */
8229 	rc = hl_poll_timeout(
8230 		hdev,
8231 		mmTPC0_CFG_STATUS + offset,
8232 		status,
8233 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8234 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8235 		1000,
8236 		kernel_timeout);
8237 
8238 	if (rc) {
8239 		dev_err(hdev->dev,
8240 			"Timeout while waiting for TPC%d icache prefetch\n",
8241 			tpc_id);
8242 		return -EIO;
8243 	}
8244 
8245 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8246 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8247 
8248 	/* wait a bit for the engine to start executing */
8249 	usleep_range(1000, 1500);
8250 
8251 	/* wait until engine has finished executing */
8252 	rc = hl_poll_timeout(
8253 		hdev,
8254 		mmTPC0_CFG_STATUS + offset,
8255 		status,
8256 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8257 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8258 		1000,
8259 		kernel_timeout);
8260 
8261 	if (rc) {
8262 		dev_err(hdev->dev,
8263 			"Timeout while waiting for TPC%d vector pipe\n",
8264 			tpc_id);
8265 		return -EIO;
8266 	}
8267 
8268 	rc = hl_poll_timeout(
8269 		hdev,
8270 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8271 		status,
8272 		(status == 0),
8273 		1000,
8274 		kernel_timeout);
8275 
8276 	if (rc) {
8277 		dev_err(hdev->dev,
8278 			"Timeout while waiting for TPC%d kernel to execute\n",
8279 			tpc_id);
8280 		return -EIO;
8281 	}
8282 
8283 	return 0;
8284 }
8285 
8286 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8287 		struct hl_ctx *ctx)
8288 {
8289 	struct gaudi_device *gaudi = hdev->asic_specific;
8290 	int min_alloc_order, rc, collective_cb_size;
8291 
8292 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8293 		return 0;
8294 
8295 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8296 							HOST_SPACE_INTERNAL_CB_SZ,
8297 							&hdev->internal_cb_pool_dma_addr,
8298 							GFP_KERNEL | __GFP_ZERO);
8299 
8300 	if (!hdev->internal_cb_pool_virt_addr)
8301 		return -ENOMEM;
8302 
8303 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8304 			sizeof(struct packet_fence);
8305 	min_alloc_order = ilog2(collective_cb_size);
8306 
8307 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8308 	if (!hdev->internal_cb_pool) {
8309 		dev_err(hdev->dev,
8310 			"Failed to create internal CB pool\n");
8311 		rc = -ENOMEM;
8312 		goto free_internal_cb_pool;
8313 	}
8314 
8315 	rc = gen_pool_add(hdev->internal_cb_pool,
8316 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8317 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8318 	if (rc) {
8319 		dev_err(hdev->dev,
8320 			"Failed to add memory to internal CB pool\n");
8321 		rc = -EFAULT;
8322 		goto destroy_internal_cb_pool;
8323 	}
8324 
8325 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8326 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8327 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8328 
8329 	if (!hdev->internal_cb_va_base) {
8330 		rc = -ENOMEM;
8331 		goto destroy_internal_cb_pool;
8332 	}
8333 
8334 	mutex_lock(&hdev->mmu_lock);
8335 
8336 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8337 			hdev->internal_cb_pool_dma_addr,
8338 			HOST_SPACE_INTERNAL_CB_SZ);
8339 	if (rc)
8340 		goto unreserve_internal_cb_pool;
8341 
8342 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8343 	if (rc)
8344 		goto unmap_internal_cb_pool;
8345 
8346 	mutex_unlock(&hdev->mmu_lock);
8347 
8348 	return 0;
8349 
8350 unmap_internal_cb_pool:
8351 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8352 			HOST_SPACE_INTERNAL_CB_SZ);
8353 unreserve_internal_cb_pool:
8354 	mutex_unlock(&hdev->mmu_lock);
8355 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8356 			HOST_SPACE_INTERNAL_CB_SZ);
8357 destroy_internal_cb_pool:
8358 	gen_pool_destroy(hdev->internal_cb_pool);
8359 free_internal_cb_pool:
8360 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8361 					hdev->internal_cb_pool_dma_addr);
8362 
8363 	return rc;
8364 }
8365 
8366 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8367 		struct hl_ctx *ctx)
8368 {
8369 	struct gaudi_device *gaudi = hdev->asic_specific;
8370 
8371 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8372 		return;
8373 
8374 	mutex_lock(&hdev->mmu_lock);
8375 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8376 			HOST_SPACE_INTERNAL_CB_SZ);
8377 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8378 			HOST_SPACE_INTERNAL_CB_SZ);
8379 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8380 	mutex_unlock(&hdev->mmu_lock);
8381 
8382 	gen_pool_destroy(hdev->internal_cb_pool);
8383 
8384 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8385 					hdev->internal_cb_pool_dma_addr);
8386 }
8387 
8388 static int gaudi_ctx_init(struct hl_ctx *ctx)
8389 {
8390 	int rc;
8391 
8392 	if (ctx->asid == HL_KERNEL_ASID_ID)
8393 		return 0;
8394 
8395 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8396 	if (rc)
8397 		return rc;
8398 
8399 	rc = gaudi_restore_user_registers(ctx->hdev);
8400 	if (rc)
8401 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8402 
8403 	return rc;
8404 }
8405 
8406 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8407 {
8408 	if (ctx->asid == HL_KERNEL_ASID_ID)
8409 		return;
8410 
8411 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8412 }
8413 
8414 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8415 {
8416 	return 0;
8417 }
8418 
8419 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8420 {
8421 	return gaudi_cq_assignment[cq_idx];
8422 }
8423 
8424 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8425 {
8426 	return sizeof(struct packet_msg_short) +
8427 			sizeof(struct packet_msg_prot) * 2;
8428 }
8429 
8430 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8431 {
8432 	return sizeof(struct packet_msg_short) * 4 +
8433 			sizeof(struct packet_fence) +
8434 			sizeof(struct packet_msg_prot) * 2;
8435 }
8436 
8437 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8438 {
8439 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8440 }
8441 
8442 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8443 				u32 size, bool eb)
8444 {
8445 	struct hl_cb *cb = (struct hl_cb *) data;
8446 	struct packet_msg_short *pkt;
8447 	u32 value, ctl, pkt_size = sizeof(*pkt);
8448 
8449 	pkt = cb->kernel_address + size;
8450 	memset(pkt, 0, pkt_size);
8451 
8452 	/* Inc by 1, Mode ADD */
8453 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8454 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8455 
8456 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8457 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8458 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8459 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8460 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8461 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8462 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8463 
8464 	pkt->value = cpu_to_le32(value);
8465 	pkt->ctl = cpu_to_le32(ctl);
8466 
8467 	return size + pkt_size;
8468 }
8469 
8470 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8471 					u16 addr)
8472 {
8473 	u32 ctl, pkt_size = sizeof(*pkt);
8474 
8475 	memset(pkt, 0, pkt_size);
8476 
8477 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8478 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8479 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8480 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8481 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8482 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8483 
8484 	pkt->value = cpu_to_le32(value);
8485 	pkt->ctl = cpu_to_le32(ctl);
8486 
8487 	return pkt_size;
8488 }
8489 
8490 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8491 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8492 		u16 sob_val, u16 mon_id)
8493 {
8494 	u64 monitor_base;
8495 	u32 ctl, value, pkt_size = sizeof(*pkt);
8496 	u16 msg_addr_offset;
8497 	u8 mask;
8498 
8499 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8500 		dev_err(hdev->dev,
8501 			"sob_base %u (mask %#x) is not valid\n",
8502 			sob_base, sob_mask);
8503 		return 0;
8504 	}
8505 
8506 	/*
8507 	 * monitor_base should be the content of the base0 address registers,
8508 	 * so it will be added to the msg short offsets
8509 	 */
8510 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8511 
8512 	msg_addr_offset =
8513 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8514 				monitor_base;
8515 
8516 	memset(pkt, 0, pkt_size);
8517 
8518 	/* Monitor config packet: bind the monitor to a sync object */
8519 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8520 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8521 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8522 			0); /* GREATER OR EQUAL*/
8523 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8524 
8525 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8526 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8527 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8528 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8529 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8530 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8531 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8532 
8533 	pkt->value = cpu_to_le32(value);
8534 	pkt->ctl = cpu_to_le32(ctl);
8535 
8536 	return pkt_size;
8537 }
8538 
8539 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8540 {
8541 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8542 
8543 	memset(pkt, 0, pkt_size);
8544 
8545 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8546 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8547 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8548 
8549 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8550 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8551 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8552 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8553 
8554 	pkt->cfg = cpu_to_le32(cfg);
8555 	pkt->ctl = cpu_to_le32(ctl);
8556 
8557 	return pkt_size;
8558 }
8559 
8560 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8561 {
8562 	u32 offset, nic_index;
8563 
8564 	switch (queue_id) {
8565 	case GAUDI_QUEUE_ID_DMA_0_0:
8566 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8567 		break;
8568 	case GAUDI_QUEUE_ID_DMA_0_1:
8569 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8570 		break;
8571 	case GAUDI_QUEUE_ID_DMA_0_2:
8572 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8573 		break;
8574 	case GAUDI_QUEUE_ID_DMA_0_3:
8575 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8576 		break;
8577 	case GAUDI_QUEUE_ID_DMA_1_0:
8578 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8579 		break;
8580 	case GAUDI_QUEUE_ID_DMA_1_1:
8581 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8582 		break;
8583 	case GAUDI_QUEUE_ID_DMA_1_2:
8584 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8585 		break;
8586 	case GAUDI_QUEUE_ID_DMA_1_3:
8587 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8588 		break;
8589 	case GAUDI_QUEUE_ID_DMA_5_0:
8590 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8591 		break;
8592 	case GAUDI_QUEUE_ID_DMA_5_1:
8593 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8594 		break;
8595 	case GAUDI_QUEUE_ID_DMA_5_2:
8596 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8597 		break;
8598 	case GAUDI_QUEUE_ID_DMA_5_3:
8599 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8600 		break;
8601 	case GAUDI_QUEUE_ID_TPC_7_0:
8602 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8603 		break;
8604 	case GAUDI_QUEUE_ID_TPC_7_1:
8605 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8606 		break;
8607 	case GAUDI_QUEUE_ID_TPC_7_2:
8608 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8609 		break;
8610 	case GAUDI_QUEUE_ID_TPC_7_3:
8611 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8612 		break;
8613 	case GAUDI_QUEUE_ID_NIC_0_0:
8614 	case GAUDI_QUEUE_ID_NIC_1_0:
8615 	case GAUDI_QUEUE_ID_NIC_2_0:
8616 	case GAUDI_QUEUE_ID_NIC_3_0:
8617 	case GAUDI_QUEUE_ID_NIC_4_0:
8618 	case GAUDI_QUEUE_ID_NIC_5_0:
8619 	case GAUDI_QUEUE_ID_NIC_6_0:
8620 	case GAUDI_QUEUE_ID_NIC_7_0:
8621 	case GAUDI_QUEUE_ID_NIC_8_0:
8622 	case GAUDI_QUEUE_ID_NIC_9_0:
8623 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8624 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8625 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8626 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8627 		break;
8628 	case GAUDI_QUEUE_ID_NIC_0_1:
8629 	case GAUDI_QUEUE_ID_NIC_1_1:
8630 	case GAUDI_QUEUE_ID_NIC_2_1:
8631 	case GAUDI_QUEUE_ID_NIC_3_1:
8632 	case GAUDI_QUEUE_ID_NIC_4_1:
8633 	case GAUDI_QUEUE_ID_NIC_5_1:
8634 	case GAUDI_QUEUE_ID_NIC_6_1:
8635 	case GAUDI_QUEUE_ID_NIC_7_1:
8636 	case GAUDI_QUEUE_ID_NIC_8_1:
8637 	case GAUDI_QUEUE_ID_NIC_9_1:
8638 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8639 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8640 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8641 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8642 		break;
8643 	case GAUDI_QUEUE_ID_NIC_0_2:
8644 	case GAUDI_QUEUE_ID_NIC_1_2:
8645 	case GAUDI_QUEUE_ID_NIC_2_2:
8646 	case GAUDI_QUEUE_ID_NIC_3_2:
8647 	case GAUDI_QUEUE_ID_NIC_4_2:
8648 	case GAUDI_QUEUE_ID_NIC_5_2:
8649 	case GAUDI_QUEUE_ID_NIC_6_2:
8650 	case GAUDI_QUEUE_ID_NIC_7_2:
8651 	case GAUDI_QUEUE_ID_NIC_8_2:
8652 	case GAUDI_QUEUE_ID_NIC_9_2:
8653 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8654 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8655 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8656 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8657 		break;
8658 	case GAUDI_QUEUE_ID_NIC_0_3:
8659 	case GAUDI_QUEUE_ID_NIC_1_3:
8660 	case GAUDI_QUEUE_ID_NIC_2_3:
8661 	case GAUDI_QUEUE_ID_NIC_3_3:
8662 	case GAUDI_QUEUE_ID_NIC_4_3:
8663 	case GAUDI_QUEUE_ID_NIC_5_3:
8664 	case GAUDI_QUEUE_ID_NIC_6_3:
8665 	case GAUDI_QUEUE_ID_NIC_7_3:
8666 	case GAUDI_QUEUE_ID_NIC_8_3:
8667 	case GAUDI_QUEUE_ID_NIC_9_3:
8668 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8669 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8670 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8671 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8672 		break;
8673 	default:
8674 		return -EINVAL;
8675 	}
8676 
8677 	*addr = CFG_BASE + offset;
8678 
8679 	return 0;
8680 }
8681 
8682 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8683 {
8684 	u64 monitor_base;
8685 	u32 size = 0;
8686 	u16 msg_addr_offset;
8687 
8688 	/*
8689 	 * monitor_base should be the content of the base0 address registers,
8690 	 * so it will be added to the msg short offsets
8691 	 */
8692 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8693 
8694 	/* First monitor config packet: low address of the sync */
8695 	msg_addr_offset =
8696 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8697 				monitor_base;
8698 
8699 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8700 					msg_addr_offset);
8701 
8702 	/* Second monitor config packet: high address of the sync */
8703 	msg_addr_offset =
8704 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8705 				monitor_base;
8706 
8707 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8708 					msg_addr_offset);
8709 
8710 	/*
8711 	 * Third monitor config packet: the payload, i.e. what to write when the
8712 	 * sync triggers
8713 	 */
8714 	msg_addr_offset =
8715 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8716 				monitor_base;
8717 
8718 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8719 
8720 	return size;
8721 }
8722 
8723 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8724 				struct hl_gen_wait_properties *prop)
8725 {
8726 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8727 	void *buf = cb->kernel_address;
8728 	u64 fence_addr = 0;
8729 	u32 size = prop->size;
8730 
8731 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8732 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8733 				prop->q_idx);
8734 		return 0;
8735 	}
8736 
8737 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8738 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8739 			prop->sob_mask, prop->sob_val, prop->mon_id);
8740 	size += gaudi_add_fence_pkt(buf + size);
8741 
8742 	return size;
8743 }
8744 
8745 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8746 {
8747 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8748 
8749 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8750 		hw_sob->sob_id);
8751 
8752 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8753 			hw_sob->sob_id * 4, 0);
8754 
8755 	kref_init(&hw_sob->kref);
8756 }
8757 
8758 static u64 gaudi_get_device_time(struct hl_device *hdev)
8759 {
8760 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8761 
8762 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8763 }
8764 
8765 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8766 				u32 *block_size, u32 *block_id)
8767 {
8768 	return -EPERM;
8769 }
8770 
8771 static int gaudi_block_mmap(struct hl_device *hdev,
8772 				struct vm_area_struct *vma,
8773 				u32 block_id, u32 block_size)
8774 {
8775 	return -EPERM;
8776 }
8777 
8778 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8779 {
8780 	struct cpu_dyn_regs *dyn_regs =
8781 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8782 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8783 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8784 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8785 
8786 	WREG32(irq_handler_offset,
8787 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8788 }
8789 
8790 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8791 {
8792 	return -EINVAL;
8793 }
8794 
8795 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8796 {
8797 	switch (pll_idx) {
8798 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8799 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8800 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8801 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8802 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8803 	case HL_GAUDI_MME_PLL: return MME_PLL;
8804 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8805 	case HL_GAUDI_IF_PLL: return IF_PLL;
8806 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8807 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8808 	default: return -EINVAL;
8809 	}
8810 }
8811 
8812 static int gaudi_add_sync_to_engine_map_entry(
8813 	struct hl_sync_to_engine_map *map, u32 reg_value,
8814 	enum hl_sync_engine_type engine_type, u32 engine_id)
8815 {
8816 	struct hl_sync_to_engine_map_entry *entry;
8817 
8818 	/* Reg value represents a partial address of sync object,
8819 	 * it is used as unique identifier. For this we need to
8820 	 * clear the cutoff cfg base bits from the value.
8821 	 */
8822 	if (reg_value == 0 || reg_value == 0xffffffff)
8823 		return 0;
8824 	reg_value -= lower_32_bits(CFG_BASE);
8825 
8826 	/* create a new hash entry */
8827 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8828 	if (!entry)
8829 		return -ENOMEM;
8830 	entry->engine_type = engine_type;
8831 	entry->engine_id = engine_id;
8832 	entry->sync_id = reg_value;
8833 	hash_add(map->tb, &entry->node, reg_value);
8834 
8835 	return 0;
8836 }
8837 
8838 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8839 				struct hl_sync_to_engine_map *map)
8840 {
8841 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8842 	int i, j, rc;
8843 	u32 reg_value;
8844 
8845 	/* Iterate over TPC engines */
8846 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8847 
8848 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8849 					sds->props[SP_NEXT_TPC] * i);
8850 
8851 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8852 							ENGINE_TPC, i);
8853 		if (rc)
8854 			goto free_sync_to_engine_map;
8855 	}
8856 
8857 	/* Iterate over MME engines */
8858 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8859 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8860 
8861 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8862 						sds->props[SP_NEXT_MME] * i +
8863 						j * sizeof(u32));
8864 
8865 			rc = gaudi_add_sync_to_engine_map_entry(
8866 				map, reg_value, ENGINE_MME,
8867 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8868 			if (rc)
8869 				goto free_sync_to_engine_map;
8870 		}
8871 	}
8872 
8873 	/* Iterate over DMA engines */
8874 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8875 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8876 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8877 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8878 							ENGINE_DMA, i);
8879 		if (rc)
8880 			goto free_sync_to_engine_map;
8881 	}
8882 
8883 	return 0;
8884 
8885 free_sync_to_engine_map:
8886 	hl_state_dump_free_sync_to_engine_map(map);
8887 
8888 	return rc;
8889 }
8890 
8891 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8892 {
8893 	return FIELD_GET(
8894 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8895 		mon->status);
8896 }
8897 
8898 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8899 {
8900 	const size_t max_write = 10;
8901 	u32 gid, mask, sob;
8902 	int i, offset;
8903 
8904 	/* Sync object ID is calculated as follows:
8905 	 * (8 * group_id + cleared bits in mask)
8906 	 */
8907 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8908 			mon->arm_data);
8909 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8910 			mon->arm_data);
8911 
8912 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8913 		max_write; mask >>= 1, i++) {
8914 		if (!(mask & 1)) {
8915 			sob = gid * MONITOR_MAX_SOBS + i;
8916 
8917 			if (offset > 0)
8918 				offset += snprintf(sobs + offset, max_write,
8919 							", ");
8920 
8921 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8922 		}
8923 	}
8924 }
8925 
8926 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8927 				struct hl_device *hdev,
8928 				struct hl_mon_state_dump *mon)
8929 {
8930 	const char *name;
8931 	char scratch_buf1[BIN_REG_STRING_SIZE],
8932 		scratch_buf2[BIN_REG_STRING_SIZE];
8933 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8934 
8935 	name = hl_state_dump_get_monitor_name(hdev, mon);
8936 	if (!name)
8937 		name = "";
8938 
8939 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8940 
8941 	return hl_snprintf_resize(
8942 		buf, size, offset,
8943 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8944 		mon->id, name,
8945 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8946 				mon->arm_data),
8947 		hl_format_as_binary(
8948 			scratch_buf1, sizeof(scratch_buf1),
8949 			FIELD_GET(
8950 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8951 				mon->arm_data)),
8952 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8953 				mon->arm_data),
8954 		mon->wr_data,
8955 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8956 		hl_format_as_binary(
8957 			scratch_buf2, sizeof(scratch_buf2),
8958 			FIELD_GET(
8959 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8960 				mon->status)),
8961 		monitored_sobs);
8962 }
8963 
8964 
8965 static int gaudi_print_fences_single_engine(
8966 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8967 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8968 	size_t *size, size_t *offset)
8969 {
8970 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8971 	int rc = -ENOMEM, i;
8972 	u32 *statuses, *fences;
8973 
8974 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8975 			sizeof(*statuses), GFP_KERNEL);
8976 	if (!statuses)
8977 		goto out;
8978 
8979 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8980 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8981 			 sizeof(*fences), GFP_KERNEL);
8982 	if (!fences)
8983 		goto free_status;
8984 
8985 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8986 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8987 
8988 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8989 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8990 		fences[i] = RREG32(base_offset + i * sizeof(u32));
8991 
8992 	/* The actual print */
8993 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
8994 		u32 fence_id;
8995 		u64 fence_cnt, fence_rdata;
8996 		const char *engine_name;
8997 
8998 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
8999 			statuses[i]))
9000 			continue;
9001 
9002 		fence_id =
9003 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9004 		fence_cnt = base_offset + CFG_BASE +
9005 			sizeof(u32) *
9006 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9007 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9008 				sds->props[SP_FENCE0_RDATA_OFFSET];
9009 		engine_name = hl_sync_engine_to_string(engine_type);
9010 
9011 		rc = hl_snprintf_resize(
9012 			buf, size, offset,
9013 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9014 			engine_name, engine_id,
9015 			i, fence_id,
9016 			fence_cnt, engine_name, engine_id, fence_id, i,
9017 			fence_rdata, engine_name, engine_id, fence_id, i,
9018 			fences[fence_id],
9019 			statuses[i]);
9020 		if (rc)
9021 			goto free_fences;
9022 	}
9023 
9024 	rc = 0;
9025 
9026 free_fences:
9027 	kfree(fences);
9028 free_status:
9029 	kfree(statuses);
9030 out:
9031 	return rc;
9032 }
9033 
9034 
9035 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9036 	.monitor_valid = gaudi_monitor_valid,
9037 	.print_single_monitor = gaudi_print_single_monitor,
9038 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9039 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9040 };
9041 
9042 static void gaudi_state_dump_init(struct hl_device *hdev)
9043 {
9044 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9045 	int i;
9046 
9047 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9048 		hash_add(sds->so_id_to_str_tb,
9049 			&gaudi_so_id_to_str[i].node,
9050 			gaudi_so_id_to_str[i].id);
9051 
9052 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9053 		hash_add(sds->monitor_id_to_str_tb,
9054 			&gaudi_monitor_id_to_str[i].node,
9055 			gaudi_monitor_id_to_str[i].id);
9056 
9057 	sds->props = gaudi_state_dump_specs_props;
9058 
9059 	sds->sync_namager_names = gaudi_sync_manager_names;
9060 
9061 	sds->funcs = gaudi_state_dump_funcs;
9062 }
9063 
9064 static u32 *gaudi_get_stream_master_qid_arr(void)
9065 {
9066 	return gaudi_stream_master;
9067 }
9068 
9069 static int gaudi_set_dram_properties(struct hl_device *hdev)
9070 {
9071 	return 0;
9072 }
9073 
9074 static int gaudi_set_binning_masks(struct hl_device *hdev)
9075 {
9076 	return 0;
9077 }
9078 
9079 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9080 {
9081 }
9082 
9083 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9084 {
9085 	struct hl_device *hdev = dev_get_drvdata(dev);
9086 	struct cpucp_info *cpucp_info;
9087 
9088 	cpucp_info = &hdev->asic_prop.cpucp_info;
9089 
9090 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9091 }
9092 
9093 static DEVICE_ATTR_RO(infineon_ver);
9094 
9095 static struct attribute *gaudi_vrm_dev_attrs[] = {
9096 	&dev_attr_infineon_ver.attr,
9097 	NULL,
9098 };
9099 
9100 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9101 					struct attribute_group *dev_vrm_attr_grp)
9102 {
9103 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9104 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9105 }
9106 
9107 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9108 {
9109 	return 0;
9110 }
9111 
9112 static const struct hl_asic_funcs gaudi_funcs = {
9113 	.early_init = gaudi_early_init,
9114 	.early_fini = gaudi_early_fini,
9115 	.late_init = gaudi_late_init,
9116 	.late_fini = gaudi_late_fini,
9117 	.sw_init = gaudi_sw_init,
9118 	.sw_fini = gaudi_sw_fini,
9119 	.hw_init = gaudi_hw_init,
9120 	.hw_fini = gaudi_hw_fini,
9121 	.halt_engines = gaudi_halt_engines,
9122 	.suspend = gaudi_suspend,
9123 	.resume = gaudi_resume,
9124 	.mmap = gaudi_mmap,
9125 	.ring_doorbell = gaudi_ring_doorbell,
9126 	.pqe_write = gaudi_pqe_write,
9127 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9128 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9129 	.scrub_device_mem = gaudi_scrub_device_mem,
9130 	.scrub_device_dram = gaudi_scrub_device_dram,
9131 	.get_int_queue_base = gaudi_get_int_queue_base,
9132 	.test_queues = gaudi_test_queues,
9133 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9134 	.asic_dma_pool_free = gaudi_dma_pool_free,
9135 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9136 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9137 	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9138 	.cs_parser = gaudi_cs_parser,
9139 	.dma_map_sgtable = hl_asic_dma_map_sgtable,
9140 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9141 	.update_eq_ci = gaudi_update_eq_ci,
9142 	.context_switch = gaudi_context_switch,
9143 	.restore_phase_topology = gaudi_restore_phase_topology,
9144 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9145 	.add_device_attr = gaudi_add_device_attr,
9146 	.handle_eqe = gaudi_handle_eqe,
9147 	.get_events_stat = gaudi_get_events_stat,
9148 	.read_pte = gaudi_read_pte,
9149 	.write_pte = gaudi_write_pte,
9150 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9151 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9152 	.mmu_prefetch_cache_range = NULL,
9153 	.send_heartbeat = gaudi_send_heartbeat,
9154 	.debug_coresight = gaudi_debug_coresight,
9155 	.is_device_idle = gaudi_is_device_idle,
9156 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9157 	.hw_queues_lock = gaudi_hw_queues_lock,
9158 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9159 	.get_pci_id = gaudi_get_pci_id,
9160 	.get_eeprom_data = gaudi_get_eeprom_data,
9161 	.get_monitor_dump = gaudi_get_monitor_dump,
9162 	.send_cpu_message = gaudi_send_cpu_message,
9163 	.pci_bars_map = gaudi_pci_bars_map,
9164 	.init_iatu = gaudi_init_iatu,
9165 	.rreg = hl_rreg,
9166 	.wreg = hl_wreg,
9167 	.halt_coresight = gaudi_halt_coresight,
9168 	.ctx_init = gaudi_ctx_init,
9169 	.ctx_fini = gaudi_ctx_fini,
9170 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9171 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9172 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9173 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9174 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9175 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9176 	.gen_signal_cb = gaudi_gen_signal_cb,
9177 	.gen_wait_cb = gaudi_gen_wait_cb,
9178 	.reset_sob = gaudi_reset_sob,
9179 	.reset_sob_group = gaudi_reset_sob_group,
9180 	.get_device_time = gaudi_get_device_time,
9181 	.pb_print_security_errors = NULL,
9182 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9183 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9184 	.get_dec_base_addr = NULL,
9185 	.scramble_addr = hl_mmu_scramble_addr,
9186 	.descramble_addr = hl_mmu_descramble_addr,
9187 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9188 	.get_hw_block_id = gaudi_get_hw_block_id,
9189 	.hw_block_mmap = gaudi_block_mmap,
9190 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9191 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9192 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9193 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9194 	.init_firmware_loader = gaudi_init_firmware_loader,
9195 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9196 	.state_dump_init = gaudi_state_dump_init,
9197 	.get_sob_addr = gaudi_get_sob_addr,
9198 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9199 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9200 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9201 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9202 	.access_dev_mem = hl_access_dev_mem,
9203 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9204 	.send_device_activity = gaudi_send_device_activity,
9205 	.set_dram_properties = gaudi_set_dram_properties,
9206 	.set_binning_masks = gaudi_set_binning_masks,
9207 };
9208 
9209 /**
9210  * gaudi_set_asic_funcs - set GAUDI function pointers
9211  *
9212  * @hdev: pointer to hl_device structure
9213  *
9214  */
9215 void gaudi_set_asic_funcs(struct hl_device *hdev)
9216 {
9217 	hdev->asic_funcs = &gaudi_funcs;
9218 }
9219