xref: /linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision 55a42f78ffd386e01a5404419f8c5ded7db70a21)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69 
70 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
71 
72 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
76 
77 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
86 
87 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
88 
89 #define GAUDI_MAX_STRING_LEN		20
90 
91 #define GAUDI_CB_POOL_CB_CNT		512
92 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
93 
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
95 
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
97 
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
99 
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
101 
102 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
103 
104 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105 
106 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
107 
108 #define MONITOR_SOB_STRING_SIZE		256
109 
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 	GAUDI_QUEUE_ID_DMA_0_0,
112 	GAUDI_QUEUE_ID_DMA_0_1,
113 	GAUDI_QUEUE_ID_DMA_0_2,
114 	GAUDI_QUEUE_ID_DMA_0_3,
115 	GAUDI_QUEUE_ID_DMA_1_0,
116 	GAUDI_QUEUE_ID_DMA_1_1,
117 	GAUDI_QUEUE_ID_DMA_1_2,
118 	GAUDI_QUEUE_ID_DMA_1_3
119 };
120 
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131 
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
134 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
135 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
136 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
137 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
138 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
139 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
140 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142 
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
145 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
146 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
147 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
148 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
149 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
150 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
151 	[PACKET_FENCE]		= sizeof(struct packet_fence),
152 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
153 	[PACKET_NOP]		= sizeof(struct packet_nop),
154 	[PACKET_STOP]		= sizeof(struct packet_stop),
155 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
156 	[PACKET_WAIT]		= sizeof(struct packet_wait),
157 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
158 };
159 
160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 	switch (id) {
163 	case PACKET_WREG_32:
164 	case PACKET_WREG_BULK:
165 	case PACKET_MSG_LONG:
166 	case PACKET_MSG_SHORT:
167 	case PACKET_CP_DMA:
168 	case PACKET_REPEAT:
169 	case PACKET_MSG_PROT:
170 	case PACKET_FENCE:
171 	case PACKET_LIN_DMA:
172 	case PACKET_NOP:
173 	case PACKET_STOP:
174 	case PACKET_ARB_POINT:
175 	case PACKET_WAIT:
176 	case PACKET_LOAD_AND_EXE:
177 		return true;
178 	default:
179 		return false;
180 	}
181 }
182 
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 	"tpc_address_exceed_slm",
186 	"tpc_div_by_0",
187 	"tpc_spu_mac_overflow",
188 	"tpc_spu_addsub_overflow",
189 	"tpc_spu_abs_overflow",
190 	"tpc_spu_fp_dst_nan_inf",
191 	"tpc_spu_fp_dst_denorm",
192 	"tpc_vpu_mac_overflow",
193 	"tpc_vpu_addsub_overflow",
194 	"tpc_vpu_abs_overflow",
195 	"tpc_vpu_fp_dst_nan_inf",
196 	"tpc_vpu_fp_dst_denorm",
197 	"tpc_assertions",
198 	"tpc_illegal_instruction",
199 	"tpc_pc_wrap_around",
200 	"tpc_qm_sw_err",
201 	"tpc_hbw_rresp_err",
202 	"tpc_hbw_bresp_err",
203 	"tpc_lbw_rresp_err",
204 	"tpc_lbw_bresp_err"
205 };
206 
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 	"PQ AXI HBW error",
210 	"CQ AXI HBW error",
211 	"CP AXI HBW error",
212 	"CP error due to undefined OPCODE",
213 	"CP encountered STOP OPCODE",
214 	"CP AXI LBW error",
215 	"CP WRREG32 or WRBULK returned error",
216 	"N/A",
217 	"FENCE 0 inc over max value and clipped",
218 	"FENCE 1 inc over max value and clipped",
219 	"FENCE 2 inc over max value and clipped",
220 	"FENCE 3 inc over max value and clipped",
221 	"FENCE 0 dec under min value and clipped",
222 	"FENCE 1 dec under min value and clipped",
223 	"FENCE 2 dec under min value and clipped",
224 	"FENCE 3 dec under min value and clipped"
225 };
226 
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 	"Choice push while full error",
230 	"Choice Q watchdog error",
231 	"MSG AXI LBW returned with error"
232 };
233 
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349 
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379 
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393 
394 static s64 gaudi_state_dump_specs_props[] = {
395 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 	[SP_MON_OBJ_WR_ADDR_LOW] =
399 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 	[SP_MON_OBJ_WR_ADDR_HIGH] =
401 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 	[SP_FENCE0_CNT_OFFSET] =
423 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 	[SP_FENCE0_RDATA_OFFSET] =
425 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_NUM_CORES] = 1,
428 };
429 
430 static const int gaudi_queue_id_to_engine_id[] = {
431 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461 
462 /* The order here is opposite to the order of the indexing in the h/w.
463  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464  */
465 static const char * const gaudi_sync_manager_names[] = {
466 	"SYNC_MGR_E_N",
467 	"SYNC_MGR_W_N",
468 	"SYNC_MGR_E_S",
469 	"SYNC_MGR_W_S",
470 	NULL
471 };
472 
473 struct ecc_info_extract_params {
474 	u64 block_address;
475 	u32 num_memories;
476 	bool derr;
477 };
478 
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 								u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 					struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 					u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 					u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 				u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 				u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 				struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 		return HL_COLLECTIVE_MASTER;
502 
503 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 		return HL_COLLECTIVE_SLAVE;
506 
507 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 		return HL_COLLECTIVE_SLAVE;
510 
511 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 		return HL_COLLECTIVE_SLAVE;
514 
515 	return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517 
518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 	struct asic_fixed_properties *prop = &hdev->asic_prop;
521 
522 	if (hdev->card_type == cpucp_card_type_pmc) {
523 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524 
525 		if (prop->fw_security_enabled)
526 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 		else
528 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 	} else {
530 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 	}
533 }
534 
535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 	struct asic_fixed_properties *prop = &hdev->asic_prop;
538 	u32 num_sync_stream_queues = 0;
539 	int i;
540 
541 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 	prop->hw_queues_props = kcalloc(prop->max_queues,
543 			sizeof(struct hw_queue_properties),
544 			GFP_KERNEL);
545 
546 	if (!prop->hw_queues_props)
547 		return -ENOMEM;
548 
549 	for (i = 0 ; i < prop->max_queues ; i++) {
550 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 			prop->hw_queues_props[i].driver_only = 0;
553 			prop->hw_queues_props[i].supports_sync_stream = 1;
554 			prop->hw_queues_props[i].cb_alloc_flags =
555 				CB_ALLOC_KERNEL;
556 			num_sync_stream_queues++;
557 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 			prop->hw_queues_props[i].driver_only = 1;
560 			prop->hw_queues_props[i].supports_sync_stream = 0;
561 			prop->hw_queues_props[i].cb_alloc_flags =
562 				CB_ALLOC_KERNEL;
563 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 			prop->hw_queues_props[i].driver_only = 0;
566 			prop->hw_queues_props[i].supports_sync_stream = 0;
567 			prop->hw_queues_props[i].cb_alloc_flags =
568 				CB_ALLOC_USER;
569 
570 		}
571 		prop->hw_queues_props[i].collective_mode =
572 						get_collective_mode(hdev, i);
573 	}
574 
575 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 	prop->cfg_base_address = CFG_BASE;
577 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 	prop->host_base_address = HOST_PHYS_BASE;
579 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 	prop->collective_first_sob = 0;
583 	prop->collective_first_mon = 0;
584 
585 	/* 2 SOBs per internal queue stream are reserved for collective */
586 	prop->sync_stream_first_sob =
587 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 			* QMAN_STREAMS * HL_RSVD_SOBS;
589 
590 	/* 1 monitor per internal queue stream are reserved for collective
591 	 * 2 monitors per external queue stream are reserved for collective
592 	 */
593 	prop->sync_stream_first_mon =
594 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 			(NUMBER_OF_EXT_HW_QUEUES * 2);
596 
597 	prop->dram_base_address = DRAM_PHYS_BASE;
598 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601 
602 	prop->sram_base_address = SRAM_BASE_ADDR;
603 	prop->sram_size = SRAM_SIZE;
604 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 	prop->sram_user_base_address =
606 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607 
608 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610 
611 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 	if (hdev->pldm)
613 		prop->mmu_pgt_size = 0x800000; /* 8MB */
614 	else
615 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 	prop->mmu_pte_size = HL_PTE_SIZE;
617 	prop->dram_page_size = PAGE_SIZE_2MB;
618 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 	prop->dram_supports_virtual_memory = false;
620 
621 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 	prop->pmmu.end_addr =
633 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 	prop->pmmu.page_size = PAGE_SIZE_4KB;
635 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 	prop->pmmu.last_mask = LAST_MASK;
637 	/* TODO: will be duplicated until implementing per-MMU props */
638 	prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639 	prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640 
641 	/* PMMU and HPMMU are the same except of page size */
642 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644 
645 	/* shifts and masks are the same in PMMU and DMMU */
646 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 	prop->dmmu.page_size = PAGE_SIZE_2MB;
650 	prop->dmmu.pgt_size = prop->mmu_pgt_size;
651 
652 	prop->cfg_size = CFG_SIZE;
653 	prop->max_asid = MAX_ASID;
654 	prop->num_of_events = GAUDI_EVENT_SIZE;
655 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657 
658 	set_default_power_values(hdev);
659 
660 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662 
663 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665 
666 	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667 					CARD_NAME_MAX_LEN);
668 
669 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670 
671 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672 			prop->sync_stream_first_sob +
673 			(num_sync_stream_queues * HL_RSVD_SOBS);
674 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675 			prop->sync_stream_first_mon +
676 			(num_sync_stream_queues * HL_RSVD_MONS);
677 
678 	prop->first_available_user_interrupt = USHRT_MAX;
679 	prop->tpc_interrupt_id = USHRT_MAX;
680 
681 	/* single msi */
682 	prop->eq_interrupt_id = 0;
683 
684 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 		prop->first_available_cq[i] = USHRT_MAX;
686 
687 	prop->fw_cpu_boot_dev_sts0_valid = false;
688 	prop->fw_cpu_boot_dev_sts1_valid = false;
689 	prop->hard_reset_done_by_fw = false;
690 	prop->gic_interrupts_enable = true;
691 
692 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693 
694 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696 
697 	prop->use_get_power_for_reset_history = true;
698 
699 	prop->configurable_stop_on_err = true;
700 
701 	prop->set_max_power_on_device_init = true;
702 
703 	prop->dma_mask = 48;
704 
705 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706 
707 	return 0;
708 }
709 
710 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 {
712 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 	bool is_wc[3] = {false, false, true};
714 	int rc;
715 
716 	rc = hl_pci_bars_map(hdev, name, is_wc);
717 	if (rc)
718 		return rc;
719 
720 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
722 
723 	return 0;
724 }
725 
726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 {
728 	struct gaudi_device *gaudi = hdev->asic_specific;
729 	struct hl_inbound_pci_region pci_region;
730 	u64 old_addr = addr;
731 	int rc;
732 
733 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 		return old_addr;
735 
736 	if (hdev->asic_prop.iatu_done_by_fw)
737 		return U64_MAX;
738 
739 	/* Inbound Region 2 - Bar 4 - Point to HBM */
740 	pci_region.mode = PCI_BAR_MATCH_MODE;
741 	pci_region.bar = HBM_BAR_ID;
742 	pci_region.addr = addr;
743 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744 	if (rc)
745 		return U64_MAX;
746 
747 	if (gaudi) {
748 		old_addr = gaudi->hbm_bar_cur_addr;
749 		gaudi->hbm_bar_cur_addr = addr;
750 	}
751 
752 	return old_addr;
753 }
754 
755 static int gaudi_init_iatu(struct hl_device *hdev)
756 {
757 	struct hl_inbound_pci_region inbound_region;
758 	struct hl_outbound_pci_region outbound_region;
759 	int rc;
760 
761 	if (hdev->asic_prop.iatu_done_by_fw)
762 		return 0;
763 
764 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 	inbound_region.mode = PCI_BAR_MATCH_MODE;
766 	inbound_region.bar = SRAM_BAR_ID;
767 	inbound_region.addr = SRAM_BASE_ADDR;
768 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 	if (rc)
770 		goto done;
771 
772 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 	inbound_region.mode = PCI_BAR_MATCH_MODE;
774 	inbound_region.bar = CFG_BAR_ID;
775 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 	if (rc)
778 		goto done;
779 
780 	/* Inbound Region 2 - Bar 4 - Point to HBM */
781 	inbound_region.mode = PCI_BAR_MATCH_MODE;
782 	inbound_region.bar = HBM_BAR_ID;
783 	inbound_region.addr = DRAM_PHYS_BASE;
784 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 	if (rc)
786 		goto done;
787 
788 	/* Outbound Region 0 - Point to Host */
789 	outbound_region.addr = HOST_PHYS_BASE;
790 	outbound_region.size = HOST_PHYS_SIZE;
791 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792 
793 done:
794 	return rc;
795 }
796 
797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 {
799 	return RREG32(mmHW_STATE);
800 }
801 
802 static int gaudi_early_init(struct hl_device *hdev)
803 {
804 	struct asic_fixed_properties *prop = &hdev->asic_prop;
805 	struct pci_dev *pdev = hdev->pdev;
806 	resource_size_t pci_bar_size;
807 	u32 fw_boot_status;
808 	int rc;
809 
810 	rc = gaudi_set_fixed_properties(hdev);
811 	if (rc) {
812 		dev_err(hdev->dev, "Failed setting fixed properties\n");
813 		return rc;
814 	}
815 
816 	/* Check BAR sizes */
817 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818 
819 	if (pci_bar_size != SRAM_BAR_SIZE) {
820 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 		rc = -ENODEV;
823 		goto free_queue_props;
824 	}
825 
826 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827 
828 	if (pci_bar_size != CFG_BAR_SIZE) {
829 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 		rc = -ENODEV;
832 		goto free_queue_props;
833 	}
834 
835 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837 
838 	/* If FW security is enabled at this point it means no access to ELBI */
839 	if (hdev->asic_prop.fw_security_enabled) {
840 		hdev->asic_prop.iatu_done_by_fw = true;
841 
842 		/*
843 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 		 * decision can only be taken based on PCI ID security.
845 		 */
846 		hdev->asic_prop.gic_interrupts_enable = false;
847 		goto pci_init;
848 	}
849 
850 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 				&fw_boot_status);
852 	if (rc)
853 		goto free_queue_props;
854 
855 	/* Check whether FW is configuring iATU */
856 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 		hdev->asic_prop.iatu_done_by_fw = true;
859 
860 pci_init:
861 	rc = hl_pci_init(hdev);
862 	if (rc)
863 		goto free_queue_props;
864 
865 	/* Before continuing in the initialization, we need to read the preboot
866 	 * version to determine whether we run with a security-enabled firmware
867 	 */
868 	rc = hl_fw_read_preboot_status(hdev);
869 	if (rc) {
870 		if (hdev->reset_on_preboot_fail)
871 			/* we are already on failure flow, so don't check if hw_fini fails. */
872 			hdev->asic_funcs->hw_fini(hdev, true, false);
873 		goto pci_fini;
874 	}
875 
876 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 		if (rc) {
880 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 			goto pci_fini;
882 		}
883 	}
884 
885 	return 0;
886 
887 pci_fini:
888 	hl_pci_fini(hdev);
889 free_queue_props:
890 	kfree(hdev->asic_prop.hw_queues_props);
891 	return rc;
892 }
893 
894 static int gaudi_early_fini(struct hl_device *hdev)
895 {
896 	kfree(hdev->asic_prop.hw_queues_props);
897 	hl_pci_fini(hdev);
898 
899 	return 0;
900 }
901 
902 /**
903  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904  *
905  * @hdev: pointer to hl_device structure
906  *
907  */
908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 {
910 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 	struct asic_fixed_properties *prop = &hdev->asic_prop;
912 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 	int rc;
914 
915 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 		struct gaudi_device *gaudi = hdev->asic_specific;
918 
919 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 			return 0;
921 
922 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923 
924 		if (rc)
925 			return rc;
926 
927 		freq = pll_freq_arr[2];
928 	} else {
929 		/* Backward compatibility */
930 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 		nr = RREG32(mmPSOC_CPU_PLL_NR);
933 		nf = RREG32(mmPSOC_CPU_PLL_NF);
934 		od = RREG32(mmPSOC_CPU_PLL_OD);
935 
936 		if (div_sel == DIV_SEL_REF_CLK ||
937 				div_sel == DIV_SEL_DIVIDED_REF) {
938 			if (div_sel == DIV_SEL_REF_CLK)
939 				freq = PLL_REF_CLK;
940 			else
941 				freq = PLL_REF_CLK / (div_fctr + 1);
942 		} else if (div_sel == DIV_SEL_PLL_CLK ||
943 			div_sel == DIV_SEL_DIVIDED_PLL) {
944 			pll_clk = PLL_REF_CLK * (nf + 1) /
945 					((nr + 1) * (od + 1));
946 			if (div_sel == DIV_SEL_PLL_CLK)
947 				freq = pll_clk;
948 			else
949 				freq = pll_clk / (div_fctr + 1);
950 		} else {
951 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 			freq = 0;
953 		}
954 	}
955 
956 	prop->psoc_timestamp_frequency = freq;
957 	prop->psoc_pci_pll_nr = nr;
958 	prop->psoc_pci_pll_nf = nf;
959 	prop->psoc_pci_pll_od = od;
960 	prop->psoc_pci_pll_div_factor = div_fctr;
961 
962 	return 0;
963 }
964 
965 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 {
968 	struct asic_fixed_properties *prop = &hdev->asic_prop;
969 	struct packet_lin_dma *init_tpc_mem_pkt;
970 	struct hl_cs_job *job;
971 	struct hl_cb *cb;
972 	u64 dst_addr;
973 	u32 cb_size, ctl;
974 	u8 tpc_id;
975 	int rc;
976 
977 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978 	if (!cb)
979 		return -EFAULT;
980 
981 	init_tpc_mem_pkt = cb->kernel_address;
982 	cb_size = sizeof(*init_tpc_mem_pkt);
983 	memset(init_tpc_mem_pkt, 0, cb_size);
984 
985 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986 
987 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991 
992 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993 
994 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995 
996 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 				round_up(prop->sram_user_base_address, SZ_8K));
999 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000 
1001 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 	if (!job) {
1003 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 		rc = -ENOMEM;
1005 		goto release_cb;
1006 	}
1007 
1008 	job->id = 0;
1009 	job->user_cb = cb;
1010 	atomic_inc(&job->user_cb->cs_cnt);
1011 	job->user_cb_size = cb_size;
1012 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 	job->patched_cb = job->user_cb;
1014 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015 
1016 	hl_debugfs_add_job(hdev, job);
1017 
1018 	rc = gaudi_send_job_on_qman0(hdev, job);
1019 
1020 	if (rc)
1021 		goto free_job;
1022 
1023 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025 		if (rc)
1026 			break;
1027 	}
1028 
1029 free_job:
1030 	hl_userptr_delete_list(hdev, &job->userptr_list);
1031 	hl_debugfs_remove_job(hdev, job);
1032 	kfree(job);
1033 	atomic_dec(&cb->cs_cnt);
1034 
1035 release_cb:
1036 	hl_cb_put(cb);
1037 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038 
1039 	return rc;
1040 }
1041 
1042 /*
1043  * gaudi_init_tpc_mem() - Initialize TPC memories.
1044  * @hdev: Pointer to hl_device structure.
1045  *
1046  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047  *
1048  * Return: 0 for success, negative value for error.
1049  */
1050 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 {
1052 	const struct firmware *fw;
1053 	size_t fw_size;
1054 	void *cpu_addr;
1055 	dma_addr_t dma_handle;
1056 	int rc, count = 5;
1057 
1058 again:
1059 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060 	if (rc == -EINTR && count-- > 0) {
1061 		msleep(50);
1062 		goto again;
1063 	}
1064 
1065 	if (rc) {
1066 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 				GAUDI_TPC_FW_FILE);
1068 		goto out;
1069 	}
1070 
1071 	fw_size = fw->size;
1072 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 	if (!cpu_addr) {
1074 		dev_err(hdev->dev,
1075 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1076 			fw_size);
1077 		rc = -ENOMEM;
1078 		goto out;
1079 	}
1080 
1081 	memcpy(cpu_addr, fw->data, fw_size);
1082 
1083 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084 
1085 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086 
1087 out:
1088 	release_firmware(fw);
1089 	return rc;
1090 }
1091 
1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 {
1094 	struct gaudi_device *gaudi = hdev->asic_specific;
1095 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 	struct hl_hw_queue *q;
1097 	u32 i, sob_id, sob_group_id, queue_id;
1098 
1099 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1100 	sob_group_id =
1101 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103 
1104 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 	}
1109 
1110 	/* Both DMA5 and TPC7 use the same resources since only a single
1111 	 * engine need to participate in the reduction process
1112 	 */
1113 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 	q = &hdev->kernel_queues[queue_id];
1115 	q->sync_stream_prop.collective_sob_id =
1116 			sob_id + NIC_NUMBER_OF_ENGINES;
1117 
1118 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 	q = &hdev->kernel_queues[queue_id];
1120 	q->sync_stream_prop.collective_sob_id =
1121 			sob_id + NIC_NUMBER_OF_ENGINES;
1122 }
1123 
1124 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 {
1126 	struct gaudi_hw_sob_group *hw_sob_group =
1127 		container_of(ref, struct gaudi_hw_sob_group, kref);
1128 	struct hl_device *hdev = hw_sob_group->hdev;
1129 	int i;
1130 
1131 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134 
1135 	kref_init(&hw_sob_group->kref);
1136 }
1137 
1138 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 {
1140 	struct gaudi_hw_sob_group *hw_sob_group =
1141 		container_of(ref, struct gaudi_hw_sob_group, kref);
1142 	struct hl_device *hdev = hw_sob_group->hdev;
1143 
1144 	dev_crit(hdev->dev,
1145 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1146 		hw_sob_group->base_sob_id);
1147 }
1148 
1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 {
1151 	struct gaudi_collective_properties *prop;
1152 	int i;
1153 
1154 	prop = &gaudi->collective_props;
1155 
1156 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157 
1158 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 	/* Set collective engine bit */
1163 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165 }
1166 
1167 static int gaudi_collective_init(struct hl_device *hdev)
1168 {
1169 	u32 i, sob_id, reserved_sobs_per_group;
1170 	struct gaudi_collective_properties *prop;
1171 	struct gaudi_device *gaudi;
1172 
1173 	gaudi = hdev->asic_specific;
1174 	prop = &gaudi->collective_props;
1175 	sob_id = hdev->asic_prop.collective_first_sob;
1176 
1177 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 	reserved_sobs_per_group =
1179 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180 
1181 	/* Init SOB groups */
1182 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 		prop->hw_sob_group[i].hdev = hdev;
1184 		prop->hw_sob_group[i].base_sob_id = sob_id;
1185 		sob_id += reserved_sobs_per_group;
1186 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187 	}
1188 
1189 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 		prop->next_sob_group_val[i] = 1;
1191 		prop->curr_sob_group_idx[i] = 0;
1192 		gaudi_collective_map_sobs(hdev, i);
1193 	}
1194 
1195 	gaudi_collective_mstr_sob_mask_set(gaudi);
1196 
1197 	return 0;
1198 }
1199 
1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 {
1202 	struct gaudi_device *gaudi = hdev->asic_specific;
1203 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204 
1205 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1206 					gaudi_sob_group_hw_reset);
1207 }
1208 
1209 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 {
1212 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 	struct gaudi_collective_properties *cprop;
1214 	struct hl_gen_wait_properties wait_prop;
1215 	struct hl_sync_stream_properties *prop;
1216 	struct gaudi_device *gaudi;
1217 
1218 	gaudi = hdev->asic_specific;
1219 	cprop = &gaudi->collective_props;
1220 	queue_id = job->hw_queue_id;
1221 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222 
1223 	master_sob_base =
1224 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 	master_monitor = prop->collective_mstr_mon_id[0];
1226 
1227 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228 
1229 	dev_dbg(hdev->dev,
1230 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 		master_sob_base, cprop->mstr_sob_mask[0],
1232 		cprop->next_sob_group_val[stream],
1233 		master_monitor, queue_id);
1234 
1235 	wait_prop.data = (void *) job->patched_cb;
1236 	wait_prop.sob_base = master_sob_base;
1237 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 	wait_prop.mon_id = master_monitor;
1240 	wait_prop.q_idx = queue_id;
1241 	wait_prop.size = cb_size;
1242 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243 
1244 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 	master_monitor = prop->collective_mstr_mon_id[1];
1246 
1247 	dev_dbg(hdev->dev,
1248 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 		master_sob_base, cprop->mstr_sob_mask[1],
1250 		cprop->next_sob_group_val[stream],
1251 		master_monitor, queue_id);
1252 
1253 	wait_prop.sob_base = master_sob_base;
1254 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 	wait_prop.mon_id = master_monitor;
1256 	wait_prop.size = cb_size;
1257 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258 }
1259 
1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 {
1263 	struct hl_gen_wait_properties wait_prop;
1264 	struct hl_sync_stream_properties *prop;
1265 	u32 queue_id, cb_size = 0;
1266 
1267 	queue_id = job->hw_queue_id;
1268 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269 
1270 	if (job->cs->encaps_signals) {
1271 		/* use the encaps signal handle store earlier in the flow
1272 		 * and set the SOB information from the encaps
1273 		 * signals handle
1274 		 */
1275 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276 						cs_cmpl);
1277 
1278 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1279 				job->cs->sequence,
1280 				cs_cmpl->hw_sob->sob_id,
1281 				cs_cmpl->sob_val);
1282 	}
1283 
1284 	/* Add to wait CBs using slave monitor */
1285 	wait_prop.data = (void *) job->user_cb;
1286 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 	wait_prop.sob_mask = 0x1;
1288 	wait_prop.sob_val = cs_cmpl->sob_val;
1289 	wait_prop.mon_id = prop->collective_slave_mon_id;
1290 	wait_prop.q_idx = queue_id;
1291 	wait_prop.size = cb_size;
1292 
1293 	dev_dbg(hdev->dev,
1294 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 		prop->collective_slave_mon_id, queue_id);
1297 
1298 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299 
1300 	dev_dbg(hdev->dev,
1301 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 		prop->collective_sob_id, queue_id);
1303 
1304 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305 			prop->collective_sob_id, cb_size, false);
1306 }
1307 
1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 {
1310 	struct hl_cs_compl *signal_cs_cmpl =
1311 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 	struct hl_cs_compl *cs_cmpl =
1313 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 	struct gaudi_collective_properties *cprop;
1316 	u32 stream, queue_id, sob_group_offset;
1317 	struct gaudi_device *gaudi;
1318 	struct hl_device *hdev;
1319 	struct hl_cs_job *job;
1320 	struct hl_ctx *ctx;
1321 
1322 	ctx = cs->ctx;
1323 	hdev = ctx->hdev;
1324 	gaudi = hdev->asic_specific;
1325 	cprop = &gaudi->collective_props;
1326 
1327 	if (cs->encaps_signals) {
1328 		cs_cmpl->hw_sob = handle->hw_sob;
1329 		/* at this checkpoint we only need the hw_sob pointer
1330 		 * for the completion check before start going over the jobs
1331 		 * of the master/slaves, the sob_value will be taken later on
1332 		 * in gaudi_collective_slave_init_job depends on each
1333 		 * job wait offset value.
1334 		 */
1335 		cs_cmpl->sob_val = 0;
1336 	} else {
1337 		/* copy the SOB id and value of the signal CS */
1338 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 	}
1341 
1342 	/* check again if the signal cs already completed.
1343 	 * if yes then don't send any wait cs since the hw_sob
1344 	 * could be in reset already. if signal is not completed
1345 	 * then get refcount to hw_sob to prevent resetting the sob
1346 	 * while wait cs is not submitted.
1347 	 * note that this check is protected by two locks,
1348 	 * hw queue lock and completion object lock,
1349 	 * and the same completion object lock also protects
1350 	 * the hw_sob reset handler function.
1351 	 * The hw_queue lock prevent out of sync of hw_sob
1352 	 * refcount value, changed by signal/wait flows.
1353 	 */
1354 	spin_lock(&signal_cs_cmpl->lock);
1355 
1356 	if (completion_done(&cs->signal_fence->completion)) {
1357 		spin_unlock(&signal_cs_cmpl->lock);
1358 		return -EINVAL;
1359 	}
1360 	/* Increment kref since all slave queues are now waiting on it */
1361 	kref_get(&cs_cmpl->hw_sob->kref);
1362 
1363 	spin_unlock(&signal_cs_cmpl->lock);
1364 
1365 	/* Calculate the stream from collective master queue (1st job) */
1366 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 	stream = job->hw_queue_id % 4;
1368 	sob_group_offset =
1369 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370 
1371 	list_for_each_entry(job, &cs->job_list, cs_node) {
1372 		queue_id = job->hw_queue_id;
1373 
1374 		if (hdev->kernel_queues[queue_id].collective_mode ==
1375 				HL_COLLECTIVE_MASTER)
1376 			gaudi_collective_master_init_job(hdev, job, stream,
1377 						sob_group_offset);
1378 		else
1379 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 	}
1381 
1382 	cs_cmpl->sob_group = sob_group_offset;
1383 
1384 	/* Handle sob group kref and wraparound */
1385 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386 	cprop->next_sob_group_val[stream]++;
1387 
1388 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 		/*
1390 		 * Decrement as we reached the max value.
1391 		 * The release function won't be called here as we've
1392 		 * just incremented the refcount.
1393 		 */
1394 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395 				gaudi_sob_group_reset_error);
1396 		cprop->next_sob_group_val[stream] = 1;
1397 		/* only two SOBs are currently in use */
1398 		cprop->curr_sob_group_idx[stream] =
1399 			(cprop->curr_sob_group_idx[stream] + 1) &
1400 							(HL_RSVD_SOBS - 1);
1401 
1402 		gaudi_collective_map_sobs(hdev, stream);
1403 
1404 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 				cprop->curr_sob_group_idx[stream], stream);
1406 	}
1407 
1408 	mb();
1409 	hl_fence_put(cs->signal_fence);
1410 	cs->signal_fence = NULL;
1411 
1412 	return 0;
1413 }
1414 
1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 {
1417 	u32 cacheline_end, additional_commands;
1418 
1419 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1421 
1422 	if (user_cb_size + additional_commands > cacheline_end)
1423 		return cacheline_end - user_cb_size + additional_commands;
1424 	else
1425 		return additional_commands;
1426 }
1427 
1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 		struct hl_ctx *ctx, struct hl_cs *cs,
1430 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 		u32 encaps_signal_offset)
1432 {
1433 	struct hw_queue_properties *hw_queue_prop;
1434 	struct hl_cs_counters_atomic *cntr;
1435 	struct hl_cs_job *job;
1436 	struct hl_cb *cb;
1437 	u32 cb_size;
1438 	bool patched_cb;
1439 
1440 	cntr = &hdev->aggregated_cs_counters;
1441 
1442 	if (mode == HL_COLLECTIVE_MASTER) {
1443 		/* CB size of collective master queue contains
1444 		 * 4 msg short packets for monitor 1 configuration
1445 		 * 1 fence packet
1446 		 * 4 msg short packets for monitor 2 configuration
1447 		 * 1 fence packet
1448 		 * 2 msg prot packets for completion and MSI
1449 		 */
1450 		cb_size = sizeof(struct packet_msg_short) * 8 +
1451 				sizeof(struct packet_fence) * 2 +
1452 				sizeof(struct packet_msg_prot) * 2;
1453 		patched_cb = true;
1454 	} else {
1455 		/* CB size of collective slave queues contains
1456 		 * 4 msg short packets for monitor configuration
1457 		 * 1 fence packet
1458 		 * 1 additional msg short packet for sob signal
1459 		 */
1460 		cb_size = sizeof(struct packet_msg_short) * 5 +
1461 				sizeof(struct packet_fence);
1462 		patched_cb = false;
1463 	}
1464 
1465 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 	if (!job) {
1468 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 		return -ENOMEM;
1472 	}
1473 
1474 	/* Allocate internal mapped CB for non patched CBs */
1475 	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476 	if (!cb) {
1477 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479 		kfree(job);
1480 		return -EFAULT;
1481 	}
1482 
1483 	job->id = 0;
1484 	job->cs = cs;
1485 	job->user_cb = cb;
1486 	atomic_inc(&job->user_cb->cs_cnt);
1487 	job->user_cb_size = cb_size;
1488 	job->hw_queue_id = queue_id;
1489 
1490 	/* since its guaranteed to have only one chunk in the collective wait
1491 	 * cs, we can use this chunk to set the encapsulated signal offset
1492 	 * in the jobs.
1493 	 */
1494 	if (cs->encaps_signals)
1495 		job->encaps_sig_wait_offset = encaps_signal_offset;
1496 
1497 	/*
1498 	 * No need in parsing, user CB is the patched CB.
1499 	 * We call hl_cb_destroy() out of two reasons - we don't need
1500 	 * the CB in the CB idr anymore and to decrement its refcount as
1501 	 * it was incremented inside hl_cb_kernel_create().
1502 	 */
1503 	if (patched_cb)
1504 		job->patched_cb = job->user_cb;
1505 	else
1506 		job->patched_cb = NULL;
1507 
1508 	job->job_cb_size = job->user_cb_size;
1509 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510 
1511 	/* increment refcount as for external queues we get completion */
1512 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513 		cs_get(cs);
1514 
1515 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516 
1517 	list_add_tail(&job->cs_node, &cs->job_list);
1518 
1519 	hl_debugfs_add_job(hdev, job);
1520 
1521 	return 0;
1522 }
1523 
1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525 		struct hl_ctx *ctx, struct hl_cs *cs,
1526 		u32 wait_queue_id, u32 collective_engine_id,
1527 		u32 encaps_signal_offset)
1528 {
1529 	struct gaudi_device *gaudi = hdev->asic_specific;
1530 	struct hw_queue_properties *hw_queue_prop;
1531 	u32 queue_id, collective_queue, num_jobs;
1532 	u32 stream, nic_queue, nic_idx = 0;
1533 	bool skip;
1534 	int i, rc = 0;
1535 
1536 	/* Verify wait queue id is configured as master */
1537 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 		dev_err(hdev->dev,
1540 			"Queue %d is not configured as collective master\n",
1541 			wait_queue_id);
1542 		return -EINVAL;
1543 	}
1544 
1545 	/* Verify engine id is supported */
1546 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 		dev_err(hdev->dev,
1549 			"Collective wait does not support engine %u\n",
1550 			collective_engine_id);
1551 		return -EINVAL;
1552 	}
1553 
1554 	stream = wait_queue_id % 4;
1555 
1556 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 	else
1559 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560 
1561 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563 
1564 	/* First job goes to the collective master queue, it will wait for
1565 	 * the collective slave queues to finish execution.
1566 	 * The synchronization is done using two monitors:
1567 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 	 * reduction engine (DMA5/TPC7).
1569 	 *
1570 	 * Rest of the jobs goes to the collective slave queues which will
1571 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 	 */
1573 	for (i = 0 ; i < num_jobs ; i++) {
1574 		if (i == 0) {
1575 			queue_id = wait_queue_id;
1576 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577 				HL_COLLECTIVE_MASTER, queue_id,
1578 				wait_queue_id, encaps_signal_offset);
1579 		} else {
1580 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581 				if (gaudi->hw_cap_initialized &
1582 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583 					skip = false;
1584 				else
1585 					skip = true;
1586 
1587 				queue_id = nic_queue;
1588 				nic_queue += 4;
1589 				nic_idx++;
1590 
1591 				if (skip)
1592 					continue;
1593 			} else {
1594 				queue_id = collective_queue;
1595 			}
1596 
1597 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598 				HL_COLLECTIVE_SLAVE, queue_id,
1599 				wait_queue_id, encaps_signal_offset);
1600 		}
1601 
1602 		if (rc)
1603 			return rc;
1604 	}
1605 
1606 	return rc;
1607 }
1608 
1609 static int gaudi_late_init(struct hl_device *hdev)
1610 {
1611 	struct gaudi_device *gaudi = hdev->asic_specific;
1612 	int rc;
1613 
1614 	rc = gaudi->cpucp_info_get(hdev);
1615 	if (rc) {
1616 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1617 		return rc;
1618 	}
1619 
1620 	if ((hdev->card_type == cpucp_card_type_pci) &&
1621 			(hdev->nic_ports_mask & 0x3)) {
1622 		dev_info(hdev->dev,
1623 			"PCI card detected, only 8 ports are enabled\n");
1624 		hdev->nic_ports_mask &= ~0x3;
1625 
1626 		/* Stop and disable unused NIC QMANs */
1627 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630 
1631 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634 
1635 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637 
1638 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639 	}
1640 
1641 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642 	if (rc)
1643 		return rc;
1644 
1645 	/* Scrub both SRAM and DRAM */
1646 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1647 	if (rc)
1648 		goto disable_pci_access;
1649 
1650 	rc = gaudi_fetch_psoc_frequency(hdev);
1651 	if (rc) {
1652 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1653 		goto disable_pci_access;
1654 	}
1655 
1656 	rc = gaudi_mmu_clear_pgt_range(hdev);
1657 	if (rc) {
1658 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1659 		goto disable_pci_access;
1660 	}
1661 
1662 	rc = gaudi_init_tpc_mem(hdev);
1663 	if (rc) {
1664 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1665 		goto disable_pci_access;
1666 	}
1667 
1668 	rc = gaudi_collective_init(hdev);
1669 	if (rc) {
1670 		dev_err(hdev->dev, "Failed to init collective\n");
1671 		goto disable_pci_access;
1672 	}
1673 
1674 	/* We only support a single ASID for the user, so for the sake of optimization, just
1675 	 * initialize the ASID one time during device initialization with the fixed value of 1
1676 	 */
1677 	gaudi_mmu_prepare(hdev, 1);
1678 
1679 	hl_fw_set_pll_profile(hdev);
1680 
1681 	return 0;
1682 
1683 disable_pci_access:
1684 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1685 
1686 	return rc;
1687 }
1688 
1689 static void gaudi_late_fini(struct hl_device *hdev)
1690 {
1691 	hl_hwmon_release_resources(hdev);
1692 }
1693 
1694 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1695 {
1696 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1697 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1698 	int i, j, rc = 0;
1699 
1700 	/*
1701 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1702 	 * to '1' when accessing the host.
1703 	 * Bits 49:39 of the full host address are saved for a later
1704 	 * configuration of the HW to perform extension to 50 bits.
1705 	 * Because there is a single HW register that holds the extension bits,
1706 	 * these bits must be identical in all allocated range.
1707 	 */
1708 
1709 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1710 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1711 								&dma_addr_arr[i],
1712 								GFP_KERNEL | __GFP_ZERO);
1713 		if (!virt_addr_arr[i]) {
1714 			rc = -ENOMEM;
1715 			goto free_dma_mem_arr;
1716 		}
1717 
1718 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1719 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1720 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1721 			break;
1722 	}
1723 
1724 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1725 		dev_err(hdev->dev,
1726 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1727 		rc = -EFAULT;
1728 		goto free_dma_mem_arr;
1729 	}
1730 
1731 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1732 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1733 	hdev->cpu_pci_msb_addr =
1734 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1735 
1736 	if (!hdev->asic_prop.fw_security_enabled)
1737 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1738 
1739 free_dma_mem_arr:
1740 	for (j = 0 ; j < i ; j++)
1741 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1742 						dma_addr_arr[j]);
1743 
1744 	return rc;
1745 }
1746 
1747 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1748 {
1749 	struct gaudi_device *gaudi = hdev->asic_specific;
1750 	struct gaudi_internal_qman_info *q;
1751 	u32 i;
1752 
1753 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1754 		q = &gaudi->internal_qmans[i];
1755 		if (!q->pq_kernel_addr)
1756 			continue;
1757 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1758 	}
1759 }
1760 
1761 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1762 {
1763 	struct gaudi_device *gaudi = hdev->asic_specific;
1764 	struct gaudi_internal_qman_info *q;
1765 	int rc, i;
1766 
1767 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1768 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1769 			continue;
1770 
1771 		q = &gaudi->internal_qmans[i];
1772 
1773 		switch (i) {
1774 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1775 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1776 			break;
1777 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1778 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1779 			break;
1780 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1781 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1782 			break;
1783 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1784 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1785 			break;
1786 		default:
1787 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1788 			rc = -EINVAL;
1789 			goto free_internal_qmans_pq_mem;
1790 		}
1791 
1792 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1793 								GFP_KERNEL | __GFP_ZERO);
1794 		if (!q->pq_kernel_addr) {
1795 			rc = -ENOMEM;
1796 			goto free_internal_qmans_pq_mem;
1797 		}
1798 	}
1799 
1800 	return 0;
1801 
1802 free_internal_qmans_pq_mem:
1803 	gaudi_free_internal_qmans_pq_mem(hdev);
1804 	return rc;
1805 }
1806 
1807 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1808 {
1809 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1810 	struct pci_mem_region *region;
1811 
1812 	/* CFG */
1813 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1814 	region->region_base = CFG_BASE;
1815 	region->region_size = CFG_SIZE;
1816 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1817 	region->bar_size = CFG_BAR_SIZE;
1818 	region->bar_id = CFG_BAR_ID;
1819 	region->used = 1;
1820 
1821 	/* SRAM */
1822 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1823 	region->region_base = SRAM_BASE_ADDR;
1824 	region->region_size = SRAM_SIZE;
1825 	region->offset_in_bar = 0;
1826 	region->bar_size = SRAM_BAR_SIZE;
1827 	region->bar_id = SRAM_BAR_ID;
1828 	region->used = 1;
1829 
1830 	/* DRAM */
1831 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1832 	region->region_base = DRAM_PHYS_BASE;
1833 	region->region_size = hdev->asic_prop.dram_size;
1834 	region->offset_in_bar = 0;
1835 	region->bar_size = prop->dram_pci_bar_size;
1836 	region->bar_id = HBM_BAR_ID;
1837 	region->used = 1;
1838 
1839 	/* SP SRAM */
1840 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1841 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1842 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1843 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1844 	region->bar_size = CFG_BAR_SIZE;
1845 	region->bar_id = CFG_BAR_ID;
1846 	region->used = 1;
1847 }
1848 
1849 static int gaudi_sw_init(struct hl_device *hdev)
1850 {
1851 	struct gaudi_device *gaudi;
1852 	u32 i, event_id = 0;
1853 	int rc;
1854 
1855 	/* Allocate device structure */
1856 	gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1857 	if (!gaudi)
1858 		return -ENOMEM;
1859 
1860 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1861 		if (gaudi_irq_map_table[i].valid) {
1862 			if (event_id == GAUDI_EVENT_SIZE) {
1863 				dev_err(hdev->dev,
1864 					"Event array exceeds the limit of %u events\n",
1865 					GAUDI_EVENT_SIZE);
1866 				rc = -EINVAL;
1867 				goto free_gaudi_device;
1868 			}
1869 
1870 			gaudi->events[event_id++] =
1871 					gaudi_irq_map_table[i].fc_id;
1872 		}
1873 	}
1874 
1875 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1876 
1877 	hdev->asic_specific = gaudi;
1878 
1879 	/* Create DMA pool for small allocations */
1880 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1881 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1882 	if (!hdev->dma_pool) {
1883 		dev_err(hdev->dev, "failed to create DMA pool\n");
1884 		rc = -ENOMEM;
1885 		goto free_gaudi_device;
1886 	}
1887 
1888 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1889 	if (rc)
1890 		goto free_dma_pool;
1891 
1892 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1893 	if (!hdev->cpu_accessible_dma_pool) {
1894 		dev_err(hdev->dev,
1895 			"Failed to create CPU accessible DMA pool\n");
1896 		rc = -ENOMEM;
1897 		goto free_cpu_dma_mem;
1898 	}
1899 
1900 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1901 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1902 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1903 	if (rc) {
1904 		dev_err(hdev->dev,
1905 			"Failed to add memory to CPU accessible DMA pool\n");
1906 		rc = -EFAULT;
1907 		goto free_cpu_accessible_dma_pool;
1908 	}
1909 
1910 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1911 	if (rc)
1912 		goto free_cpu_accessible_dma_pool;
1913 
1914 	spin_lock_init(&gaudi->hw_queues_lock);
1915 
1916 	hdev->supports_sync_stream = true;
1917 	hdev->supports_coresight = true;
1918 	hdev->supports_staged_submission = true;
1919 	hdev->supports_wait_for_multi_cs = true;
1920 
1921 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1922 	hdev->stream_master_qid_arr =
1923 				hdev->asic_funcs->get_stream_master_qid_arr();
1924 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1925 
1926 	return 0;
1927 
1928 free_cpu_accessible_dma_pool:
1929 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1930 free_cpu_dma_mem:
1931 	if (!hdev->asic_prop.fw_security_enabled)
1932 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1933 					hdev->cpu_pci_msb_addr);
1934 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1935 					hdev->cpu_accessible_dma_address);
1936 free_dma_pool:
1937 	dma_pool_destroy(hdev->dma_pool);
1938 free_gaudi_device:
1939 	kfree(gaudi);
1940 	return rc;
1941 }
1942 
1943 static int gaudi_sw_fini(struct hl_device *hdev)
1944 {
1945 	struct gaudi_device *gaudi = hdev->asic_specific;
1946 
1947 	gaudi_free_internal_qmans_pq_mem(hdev);
1948 
1949 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1950 
1951 	if (!hdev->asic_prop.fw_security_enabled)
1952 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1953 					hdev->cpu_pci_msb_addr);
1954 
1955 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1956 					hdev->cpu_accessible_dma_address);
1957 
1958 	dma_pool_destroy(hdev->dma_pool);
1959 
1960 	kfree(gaudi);
1961 
1962 	return 0;
1963 }
1964 
1965 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1966 {
1967 	struct hl_device *hdev = arg;
1968 	int i;
1969 
1970 	if (hdev->disabled)
1971 		return IRQ_HANDLED;
1972 
1973 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1974 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1975 
1976 	hl_irq_handler_eq(irq, &hdev->event_queue);
1977 
1978 	return IRQ_HANDLED;
1979 }
1980 
1981 /*
1982  * For backward compatibility, new MSI interrupts should be set after the
1983  * existing CPU and NIC interrupts.
1984  */
1985 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1986 				bool cpu_eq)
1987 {
1988 	int msi_vec;
1989 
1990 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1991 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1992 				GAUDI_EVENT_QUEUE_MSI_IDX);
1993 
1994 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1995 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1996 
1997 	return pci_irq_vector(hdev->pdev, msi_vec);
1998 }
1999 
2000 static int gaudi_enable_msi_single(struct hl_device *hdev)
2001 {
2002 	int rc, irq;
2003 
2004 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2005 
2006 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2007 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2008 			"gaudi single msi", hdev);
2009 	if (rc)
2010 		dev_err(hdev->dev,
2011 			"Failed to request single MSI IRQ\n");
2012 
2013 	return rc;
2014 }
2015 
2016 static int gaudi_enable_msi(struct hl_device *hdev)
2017 {
2018 	struct gaudi_device *gaudi = hdev->asic_specific;
2019 	int rc;
2020 
2021 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2022 		return 0;
2023 
2024 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2025 	if (rc < 0) {
2026 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2027 		return rc;
2028 	}
2029 
2030 	rc = gaudi_enable_msi_single(hdev);
2031 	if (rc)
2032 		goto free_pci_irq_vectors;
2033 
2034 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2035 
2036 	return 0;
2037 
2038 free_pci_irq_vectors:
2039 	pci_free_irq_vectors(hdev->pdev);
2040 	return rc;
2041 }
2042 
2043 static void gaudi_sync_irqs(struct hl_device *hdev)
2044 {
2045 	struct gaudi_device *gaudi = hdev->asic_specific;
2046 
2047 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2048 		return;
2049 
2050 	/* Wait for all pending IRQs to be finished */
2051 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2052 }
2053 
2054 static void gaudi_disable_msi(struct hl_device *hdev)
2055 {
2056 	struct gaudi_device *gaudi = hdev->asic_specific;
2057 
2058 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2059 		return;
2060 
2061 	gaudi_sync_irqs(hdev);
2062 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2063 	pci_free_irq_vectors(hdev->pdev);
2064 
2065 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2066 }
2067 
2068 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2069 {
2070 	struct gaudi_device *gaudi = hdev->asic_specific;
2071 
2072 	if (hdev->asic_prop.fw_security_enabled)
2073 		return;
2074 
2075 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2076 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2077 		return;
2078 
2079 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2080 		return;
2081 
2082 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2083 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2084 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2085 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2087 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2089 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2091 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2093 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2095 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2097 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098 
2099 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2100 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2102 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2104 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2106 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2108 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2110 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2112 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2114 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115 
2116 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2117 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2118 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2119 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2121 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2123 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2125 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2127 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2129 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2131 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132 
2133 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2134 }
2135 
2136 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2137 {
2138 	struct gaudi_device *gaudi = hdev->asic_specific;
2139 
2140 	if (hdev->asic_prop.fw_security_enabled)
2141 		return;
2142 
2143 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2144 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2145 		return;
2146 
2147 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2148 		return;
2149 
2150 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2151 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2153 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2155 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2157 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2159 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2161 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2163 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2165 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166 
2167 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2168 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2170 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2172 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2174 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2176 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2178 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2180 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2182 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183 
2184 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2185 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2187 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2189 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2191 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2193 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2195 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2197 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2199 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200 
2201 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2202 }
2203 
2204 static void gaudi_init_e2e(struct hl_device *hdev)
2205 {
2206 	if (hdev->asic_prop.fw_security_enabled)
2207 		return;
2208 
2209 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2211 		return;
2212 
2213 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2214 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2215 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2216 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2217 
2218 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2219 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2220 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2221 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2222 
2223 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2224 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2225 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2226 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2227 
2228 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2229 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2230 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2231 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2232 
2233 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2234 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2235 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2236 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2237 
2238 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2239 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2240 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2241 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2242 
2243 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2244 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2245 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2246 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2247 
2248 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2249 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2250 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2251 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2252 
2253 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2254 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2255 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2256 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2257 
2258 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2259 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2260 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2261 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2262 
2263 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2264 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2265 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2266 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2267 
2268 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2269 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2270 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2271 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2272 
2273 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2274 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2275 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2276 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2277 
2278 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2279 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2280 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2281 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2282 
2283 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2284 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2285 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2286 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2287 
2288 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2289 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2290 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2291 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2292 
2293 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2294 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2295 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2296 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297 
2298 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2299 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2300 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2301 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302 
2303 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2304 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2305 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2306 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2307 
2308 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2309 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2310 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2311 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2312 
2313 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2314 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2315 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2316 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2317 
2318 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2319 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2320 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2321 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2322 
2323 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2324 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2325 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2326 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2327 
2328 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2329 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2330 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2331 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2332 
2333 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2334 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2335 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2336 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337 
2338 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2339 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2340 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2341 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342 
2343 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2344 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2345 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2346 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347 
2348 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2349 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2350 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2351 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352 
2353 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2354 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2355 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2356 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357 
2358 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2359 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2360 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2361 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362 
2363 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2364 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2365 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2366 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367 
2368 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2369 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2370 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2371 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372 
2373 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2374 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2375 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2376 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377 
2378 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2379 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2380 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2381 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382 
2383 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2384 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2385 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2386 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387 
2388 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2389 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2390 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2391 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392 
2393 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2394 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2396 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397 
2398 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2399 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2401 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402 
2403 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2404 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2406 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407 
2408 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2409 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2411 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412 
2413 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2414 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2415 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2416 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417 
2418 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2419 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2420 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2421 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422 
2423 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2424 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2425 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2426 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427 
2428 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2429 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2430 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2431 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432 
2433 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2434 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2435 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2436 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437 
2438 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2439 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2440 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2441 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442 
2443 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2444 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2445 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2446 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447 
2448 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2449 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2450 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2451 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2452 }
2453 
2454 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455 {
2456 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457 
2458 	if (hdev->asic_prop.fw_security_enabled)
2459 		return;
2460 
2461 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2462 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2463 		return;
2464 
2465 	hbm0_wr = 0x33333333;
2466 	hbm0_rd = 0x77777777;
2467 	hbm1_wr = 0x55555555;
2468 	hbm1_rd = 0xDDDDDDDD;
2469 
2470 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2471 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2472 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2473 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474 
2475 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2476 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2477 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2478 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479 
2480 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2481 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2482 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2483 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484 
2485 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2486 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2487 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2488 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489 
2490 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2491 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2494 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2497 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2500 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502 
2503 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2504 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2507 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2510 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2513 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515 }
2516 
2517 static void gaudi_init_golden_registers(struct hl_device *hdev)
2518 {
2519 	u32 tpc_offset;
2520 	int tpc_id, i;
2521 
2522 	gaudi_init_e2e(hdev);
2523 	gaudi_init_hbm_cred(hdev);
2524 
2525 	for (tpc_id = 0, tpc_offset = 0;
2526 				tpc_id < TPC_NUMBER_OF_ENGINES;
2527 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2528 		/* Mask all arithmetic interrupts from TPC */
2529 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2530 		/* Set 16 cache lines */
2531 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2532 				ICACHE_FETCH_LINE_NUM, 2);
2533 	}
2534 
2535 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536 	for (i = 0 ; i < 128 ; i += 8)
2537 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2538 
2539 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 }
2544 
2545 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2546 					int qman_id, dma_addr_t qman_pq_addr)
2547 {
2548 	struct cpu_dyn_regs *dyn_regs =
2549 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2550 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2551 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2552 	u32 q_off, dma_qm_offset;
2553 	u32 dma_qm_err_cfg, irq_handler_offset;
2554 
2555 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556 
2557 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2558 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2559 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2560 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561 	so_base_en_lo = lower_32_bits(CFG_BASE +
2562 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2563 	so_base_en_hi = upper_32_bits(CFG_BASE +
2564 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2566 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2567 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2568 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2570 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2571 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2572 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573 
2574 	q_off = dma_qm_offset + qman_id * 4;
2575 
2576 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2577 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578 
2579 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2580 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2581 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582 
2583 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2584 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2585 							QMAN_LDMA_SRC_OFFSET);
2586 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2587 							QMAN_LDMA_DST_OFFSET);
2588 
2589 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2590 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2591 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2592 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2593 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2594 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2595 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2596 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597 
2598 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599 
2600 	/* The following configuration is needed only once per QMAN */
2601 	if (qman_id == 0) {
2602 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2603 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2604 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605 
2606 		/* Configure RAZWI IRQ */
2607 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2608 		if (hdev->stop_on_err)
2609 			dma_qm_err_cfg |=
2610 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611 
2612 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613 
2614 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2615 			lower_32_bits(CFG_BASE + irq_handler_offset));
2616 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2617 			upper_32_bits(CFG_BASE + irq_handler_offset));
2618 
2619 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2620 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2621 									dma_id);
2622 
2623 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2624 				QM_ARB_ERR_MSG_EN_MASK);
2625 
2626 		/* Set timeout to maximum */
2627 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2628 
2629 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630 				QMAN_EXTERNAL_MAKE_TRUSTED);
2631 
2632 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633 	}
2634 }
2635 
2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637 {
2638 	struct cpu_dyn_regs *dyn_regs =
2639 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642 	u32 irq_handler_offset;
2643 
2644 	/* Set to maximum possible according to physical size */
2645 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647 
2648 	/* WA for H/W bug H3-2116 */
2649 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650 
2651 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2652 	if (hdev->stop_on_err)
2653 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654 
2655 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656 
2657 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660 
2661 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662 		lower_32_bits(CFG_BASE + irq_handler_offset));
2663 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664 		upper_32_bits(CFG_BASE + irq_handler_offset));
2665 
2666 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2669 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670 	/* If the channel is secured, it should be in MMU bypass mode */
2671 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674 }
2675 
2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677 				u32 enable_mask)
2678 {
2679 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680 
2681 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682 }
2683 
2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685 {
2686 	struct gaudi_device *gaudi = hdev->asic_specific;
2687 	struct hl_hw_queue *q;
2688 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689 
2690 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691 		return;
2692 
2693 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694 		dma_id = gaudi_dma_assignment[i];
2695 		/*
2696 		 * For queues after the CPU Q need to add 1 to get the correct
2697 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698 		 * order to get the correct MSI register.
2699 		 */
2700 		if (dma_id > 1) {
2701 			cpu_skip = 1;
2702 			nic_skip = NIC_NUMBER_OF_ENGINES;
2703 		} else {
2704 			cpu_skip = 0;
2705 			nic_skip = 0;
2706 		}
2707 
2708 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709 			q_idx = 4 * dma_id + j + cpu_skip;
2710 			q = &hdev->kernel_queues[q_idx];
2711 			q->cq_id = cq_id++;
2712 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2714 						q->bus_address);
2715 		}
2716 
2717 		gaudi_init_dma_core(hdev, dma_id);
2718 
2719 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720 	}
2721 
2722 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723 }
2724 
2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726 					int qman_id, u64 qman_base_addr)
2727 {
2728 	struct cpu_dyn_regs *dyn_regs =
2729 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732 	u32 dma_qm_err_cfg, irq_handler_offset;
2733 	u32 q_off, dma_qm_offset;
2734 
2735 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736 
2737 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 	so_base_en_lo = lower_32_bits(CFG_BASE +
2742 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743 	so_base_en_hi = upper_32_bits(CFG_BASE +
2744 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2750 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2752 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753 
2754 	q_off = dma_qm_offset + qman_id * 4;
2755 
2756 	if (qman_id < 4) {
2757 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758 					lower_32_bits(qman_base_addr));
2759 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760 					upper_32_bits(qman_base_addr));
2761 
2762 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765 
2766 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767 							QMAN_CPDMA_SIZE_OFFSET);
2768 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769 							QMAN_CPDMA_SRC_OFFSET);
2770 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771 							QMAN_CPDMA_DST_OFFSET);
2772 	} else {
2773 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776 
2777 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778 							QMAN_LDMA_SIZE_OFFSET);
2779 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780 							QMAN_LDMA_SRC_OFFSET);
2781 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782 							QMAN_LDMA_DST_OFFSET);
2783 
2784 		/* Configure RAZWI IRQ */
2785 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786 		if (hdev->stop_on_err)
2787 			dma_qm_err_cfg |=
2788 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789 
2790 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791 
2792 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793 			lower_32_bits(CFG_BASE + irq_handler_offset));
2794 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795 			upper_32_bits(CFG_BASE + irq_handler_offset));
2796 
2797 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799 									dma_id);
2800 
2801 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802 				QM_ARB_ERR_MSG_EN_MASK);
2803 
2804 		/* Set timeout to maximum */
2805 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2806 
2807 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2808 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809 				QMAN_INTERNAL_MAKE_TRUSTED);
2810 	}
2811 
2812 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2813 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2814 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2815 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2816 
2817 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2819 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2820 				mtr_base_ws_lo);
2821 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2822 				mtr_base_ws_hi);
2823 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2824 				so_base_ws_lo);
2825 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2826 				so_base_ws_hi);
2827 	}
2828 }
2829 
2830 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2831 {
2832 	struct gaudi_device *gaudi = hdev->asic_specific;
2833 	struct gaudi_internal_qman_info *q;
2834 	u64 qman_base_addr;
2835 	int i, j, dma_id, internal_q_index;
2836 
2837 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2838 		return;
2839 
2840 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2841 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2842 
2843 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2844 			 /*
2845 			  * Add the CPU queue in order to get the correct queue
2846 			  * number as all internal queue are placed after it
2847 			  */
2848 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2849 
2850 			q = &gaudi->internal_qmans[internal_q_index];
2851 			qman_base_addr = (u64) q->pq_dma_addr;
2852 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2853 						qman_base_addr);
2854 		}
2855 
2856 		/* Initializing lower CP for HBM DMA QMAN */
2857 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2858 
2859 		gaudi_init_dma_core(hdev, dma_id);
2860 
2861 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2862 	}
2863 
2864 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2865 }
2866 
2867 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2868 					int qman_id, u64 qman_base_addr)
2869 {
2870 	struct cpu_dyn_regs *dyn_regs =
2871 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2872 	u32 mtr_base_lo, mtr_base_hi;
2873 	u32 so_base_lo, so_base_hi;
2874 	u32 irq_handler_offset;
2875 	u32 q_off, mme_id;
2876 	u32 mme_qm_err_cfg;
2877 
2878 	mtr_base_lo = lower_32_bits(CFG_BASE +
2879 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2880 	mtr_base_hi = upper_32_bits(CFG_BASE +
2881 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882 	so_base_lo = lower_32_bits(CFG_BASE +
2883 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2884 	so_base_hi = upper_32_bits(CFG_BASE +
2885 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886 
2887 	q_off = mme_offset + qman_id * 4;
2888 
2889 	if (qman_id < 4) {
2890 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2891 					lower_32_bits(qman_base_addr));
2892 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2893 					upper_32_bits(qman_base_addr));
2894 
2895 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2896 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2897 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2898 
2899 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2900 							QMAN_CPDMA_SIZE_OFFSET);
2901 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2902 							QMAN_CPDMA_SRC_OFFSET);
2903 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2904 							QMAN_CPDMA_DST_OFFSET);
2905 	} else {
2906 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2907 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2908 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2909 
2910 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911 							QMAN_LDMA_SIZE_OFFSET);
2912 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913 							QMAN_LDMA_SRC_OFFSET);
2914 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915 							QMAN_LDMA_DST_OFFSET);
2916 
2917 		/* Configure RAZWI IRQ */
2918 		mme_id = mme_offset /
2919 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2920 
2921 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2922 		if (hdev->stop_on_err)
2923 			mme_qm_err_cfg |=
2924 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2925 
2926 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2927 
2928 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2929 			lower_32_bits(CFG_BASE + irq_handler_offset));
2930 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2931 			upper_32_bits(CFG_BASE + irq_handler_offset));
2932 
2933 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2934 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2935 									mme_id);
2936 
2937 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2938 				QM_ARB_ERR_MSG_EN_MASK);
2939 
2940 		/* Set timeout to maximum */
2941 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2942 
2943 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945 				QMAN_INTERNAL_MAKE_TRUSTED);
2946 	}
2947 
2948 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952 }
2953 
2954 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955 {
2956 	struct gaudi_device *gaudi = hdev->asic_specific;
2957 	struct gaudi_internal_qman_info *q;
2958 	u64 qman_base_addr;
2959 	u32 mme_offset;
2960 	int i, internal_q_index;
2961 
2962 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963 		return;
2964 
2965 	/*
2966 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968 	 */
2969 
2970 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971 
2972 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974 		q = &gaudi->internal_qmans[internal_q_index];
2975 		qman_base_addr = (u64) q->pq_dma_addr;
2976 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977 					qman_base_addr);
2978 		if (i == 3)
2979 			mme_offset = 0;
2980 	}
2981 
2982 	/* Initializing lower CP for MME QMANs */
2983 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2986 
2987 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989 
2990 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2991 }
2992 
2993 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994 				int qman_id, u64 qman_base_addr)
2995 {
2996 	struct cpu_dyn_regs *dyn_regs =
2997 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000 	u32 tpc_qm_err_cfg, irq_handler_offset;
3001 	u32 q_off, tpc_id;
3002 
3003 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 	so_base_en_lo = lower_32_bits(CFG_BASE +
3008 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009 	so_base_en_hi = upper_32_bits(CFG_BASE +
3010 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3016 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3018 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019 
3020 	q_off = tpc_offset + qman_id * 4;
3021 
3022 	tpc_id = tpc_offset /
3023 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024 
3025 	if (qman_id < 4) {
3026 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027 					lower_32_bits(qman_base_addr));
3028 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029 					upper_32_bits(qman_base_addr));
3030 
3031 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034 
3035 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036 							QMAN_CPDMA_SIZE_OFFSET);
3037 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038 							QMAN_CPDMA_SRC_OFFSET);
3039 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040 							QMAN_CPDMA_DST_OFFSET);
3041 	} else {
3042 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045 
3046 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047 							QMAN_LDMA_SIZE_OFFSET);
3048 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049 							QMAN_LDMA_SRC_OFFSET);
3050 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051 							QMAN_LDMA_DST_OFFSET);
3052 
3053 		/* Configure RAZWI IRQ */
3054 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055 		if (hdev->stop_on_err)
3056 			tpc_qm_err_cfg |=
3057 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058 
3059 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060 
3061 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062 			lower_32_bits(CFG_BASE + irq_handler_offset));
3063 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064 			upper_32_bits(CFG_BASE + irq_handler_offset));
3065 
3066 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068 									tpc_id);
3069 
3070 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071 				QM_ARB_ERR_MSG_EN_MASK);
3072 
3073 		/* Set timeout to maximum */
3074 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3075 
3076 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3077 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3078 				QMAN_INTERNAL_MAKE_TRUSTED);
3079 	}
3080 
3081 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3082 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3083 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3084 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3085 
3086 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3087 	if (tpc_id == 6) {
3088 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3089 				mtr_base_ws_lo);
3090 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3091 				mtr_base_ws_hi);
3092 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3093 				so_base_ws_lo);
3094 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3095 				so_base_ws_hi);
3096 	}
3097 }
3098 
3099 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3100 {
3101 	struct gaudi_device *gaudi = hdev->asic_specific;
3102 	struct gaudi_internal_qman_info *q;
3103 	u64 qman_base_addr;
3104 	u32 so_base_hi, tpc_offset = 0;
3105 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3106 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3107 	int i, tpc_id, internal_q_index;
3108 
3109 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3110 		return;
3111 
3112 	so_base_hi = upper_32_bits(CFG_BASE +
3113 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114 
3115 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3116 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3117 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3118 						tpc_id * QMAN_STREAMS + i;
3119 			q = &gaudi->internal_qmans[internal_q_index];
3120 			qman_base_addr = (u64) q->pq_dma_addr;
3121 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3122 						qman_base_addr);
3123 
3124 			if (i == 3) {
3125 				/* Initializing lower CP for TPC QMAN */
3126 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3127 
3128 				/* Enable the QMAN and TPC channel */
3129 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3130 						QMAN_TPC_ENABLE);
3131 			}
3132 		}
3133 
3134 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3135 				so_base_hi);
3136 
3137 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3138 
3139 		gaudi->hw_cap_initialized |=
3140 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3141 	}
3142 }
3143 
3144 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3145 				int qman_id, u64 qman_base_addr, int nic_id)
3146 {
3147 	struct cpu_dyn_regs *dyn_regs =
3148 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3149 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3150 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3151 	u32 nic_qm_err_cfg, irq_handler_offset;
3152 	u32 q_off;
3153 
3154 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3155 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3156 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3157 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3159 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3160 	so_base_en_hi = upper_32_bits(CFG_BASE +
3161 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3163 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3164 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3165 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3167 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3168 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3169 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170 
3171 	q_off = nic_offset + qman_id * 4;
3172 
3173 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3174 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3175 
3176 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3177 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3178 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3179 
3180 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3181 							QMAN_LDMA_SIZE_OFFSET);
3182 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3183 							QMAN_LDMA_SRC_OFFSET);
3184 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3185 							QMAN_LDMA_DST_OFFSET);
3186 
3187 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3188 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3189 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3190 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3191 
3192 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3194 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3195 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3196 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3197 
3198 	if (qman_id == 0) {
3199 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3200 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3201 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3202 
3203 		/* Configure RAZWI IRQ */
3204 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3205 		if (hdev->stop_on_err)
3206 			nic_qm_err_cfg |=
3207 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3208 
3209 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3210 
3211 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3212 			lower_32_bits(CFG_BASE + irq_handler_offset));
3213 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3214 			upper_32_bits(CFG_BASE + irq_handler_offset));
3215 
3216 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3217 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3218 									nic_id);
3219 
3220 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3221 				QM_ARB_ERR_MSG_EN_MASK);
3222 
3223 		/* Set timeout to maximum */
3224 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3225 
3226 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3227 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3228 				QMAN_INTERNAL_MAKE_TRUSTED);
3229 	}
3230 }
3231 
3232 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3233 {
3234 	struct gaudi_device *gaudi = hdev->asic_specific;
3235 	struct gaudi_internal_qman_info *q;
3236 	u64 qman_base_addr;
3237 	u32 nic_offset = 0;
3238 	u32 nic_delta_between_qmans =
3239 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240 	u32 nic_delta_between_nics =
3241 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 	int i, nic_id, internal_q_index;
3243 
3244 	if (!hdev->nic_ports_mask)
3245 		return;
3246 
3247 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3248 		return;
3249 
3250 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3251 
3252 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3253 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3254 			nic_offset += nic_delta_between_qmans;
3255 			if (nic_id & 1) {
3256 				nic_offset -= (nic_delta_between_qmans * 2);
3257 				nic_offset += nic_delta_between_nics;
3258 			}
3259 			continue;
3260 		}
3261 
3262 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3263 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3264 						nic_id * QMAN_STREAMS + i;
3265 			q = &gaudi->internal_qmans[internal_q_index];
3266 			qman_base_addr = (u64) q->pq_dma_addr;
3267 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3268 						qman_base_addr, nic_id);
3269 		}
3270 
3271 		/* Enable the QMAN */
3272 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3273 
3274 		nic_offset += nic_delta_between_qmans;
3275 		if (nic_id & 1) {
3276 			nic_offset -= (nic_delta_between_qmans * 2);
3277 			nic_offset += nic_delta_between_nics;
3278 		}
3279 
3280 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3281 	}
3282 }
3283 
3284 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3285 {
3286 	struct gaudi_device *gaudi = hdev->asic_specific;
3287 
3288 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3289 		return;
3290 
3291 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3292 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3293 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3294 }
3295 
3296 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3297 {
3298 	struct gaudi_device *gaudi = hdev->asic_specific;
3299 
3300 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3301 		return;
3302 
3303 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3304 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3305 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3306 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3307 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3308 }
3309 
3310 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3311 {
3312 	struct gaudi_device *gaudi = hdev->asic_specific;
3313 
3314 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3315 		return;
3316 
3317 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3318 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3319 }
3320 
3321 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3322 {
3323 	struct gaudi_device *gaudi = hdev->asic_specific;
3324 	u32 tpc_offset = 0;
3325 	int tpc_id;
3326 
3327 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3328 		return;
3329 
3330 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3331 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3332 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3333 	}
3334 }
3335 
3336 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3337 {
3338 	struct gaudi_device *gaudi = hdev->asic_specific;
3339 	u32 nic_mask, nic_offset = 0;
3340 	u32 nic_delta_between_qmans =
3341 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3342 	u32 nic_delta_between_nics =
3343 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 	int nic_id;
3345 
3346 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3347 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348 
3349 		if (gaudi->hw_cap_initialized & nic_mask)
3350 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3351 
3352 		nic_offset += nic_delta_between_qmans;
3353 		if (nic_id & 1) {
3354 			nic_offset -= (nic_delta_between_qmans * 2);
3355 			nic_offset += nic_delta_between_nics;
3356 		}
3357 	}
3358 }
3359 
3360 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3361 {
3362 	struct gaudi_device *gaudi = hdev->asic_specific;
3363 
3364 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3365 		return;
3366 
3367 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 }
3372 
3373 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3374 {
3375 	struct gaudi_device *gaudi = hdev->asic_specific;
3376 
3377 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378 		return;
3379 
3380 	/* Stop CPs of HBM DMA QMANs */
3381 
3382 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 }
3388 
3389 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3390 {
3391 	struct gaudi_device *gaudi = hdev->asic_specific;
3392 
3393 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3394 		return;
3395 
3396 	/* Stop CPs of MME QMANs */
3397 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399 }
3400 
3401 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3402 {
3403 	struct gaudi_device *gaudi = hdev->asic_specific;
3404 
3405 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3406 		return;
3407 
3408 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 }
3417 
3418 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3419 {
3420 	struct gaudi_device *gaudi = hdev->asic_specific;
3421 
3422 	/* Stop upper CPs of QMANs */
3423 
3424 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3425 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3426 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3427 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3428 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3429 
3430 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3431 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3432 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3433 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3434 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3435 
3436 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3437 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3438 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3439 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3440 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3441 
3442 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3443 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3444 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3445 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3446 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3447 
3448 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3449 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3450 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3451 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3452 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3453 
3454 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3455 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3456 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3457 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3458 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3459 
3460 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3461 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3462 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3463 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3464 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3465 
3466 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3467 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3468 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3469 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3470 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3471 
3472 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3473 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3474 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3475 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3476 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3477 
3478 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3479 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3480 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3481 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3482 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3483 }
3484 
3485 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3486 {
3487 	struct gaudi_device *gaudi = hdev->asic_specific;
3488 
3489 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3490 		return;
3491 
3492 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 }
3496 
3497 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3498 {
3499 	struct gaudi_device *gaudi = hdev->asic_specific;
3500 
3501 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3502 		return;
3503 
3504 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 }
3510 
3511 static void gaudi_mme_stall(struct hl_device *hdev)
3512 {
3513 	struct gaudi_device *gaudi = hdev->asic_specific;
3514 
3515 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3516 		return;
3517 
3518 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535 }
3536 
3537 static void gaudi_tpc_stall(struct hl_device *hdev)
3538 {
3539 	struct gaudi_device *gaudi = hdev->asic_specific;
3540 
3541 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3542 		return;
3543 
3544 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 }
3553 
3554 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3555 {
3556 	u32 qman_offset;
3557 	int i;
3558 
3559 	if (hdev->asic_prop.fw_security_enabled)
3560 		return;
3561 
3562 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3563 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3564 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3565 
3566 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3567 	}
3568 
3569 	WREG32(mmMME0_QM_CGM_CFG, 0);
3570 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3571 	WREG32(mmMME2_QM_CGM_CFG, 0);
3572 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3573 
3574 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3575 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3576 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3577 
3578 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3579 	}
3580 }
3581 
3582 static void gaudi_enable_timestamp(struct hl_device *hdev)
3583 {
3584 	/* Disable the timestamp counter */
3585 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3586 
3587 	/* Zero the lower/upper parts of the 64-bit counter */
3588 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3589 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3590 
3591 	/* Enable the counter */
3592 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3593 }
3594 
3595 static void gaudi_disable_timestamp(struct hl_device *hdev)
3596 {
3597 	/* Disable the timestamp counter */
3598 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3599 }
3600 
3601 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3602 {
3603 	u32 wait_timeout_ms;
3604 
3605 	if (hdev->pldm)
3606 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3607 	else
3608 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3609 
3610 	if (fw_reset)
3611 		goto skip_engines;
3612 
3613 	gaudi_stop_nic_qmans(hdev);
3614 	gaudi_stop_mme_qmans(hdev);
3615 	gaudi_stop_tpc_qmans(hdev);
3616 	gaudi_stop_hbm_dma_qmans(hdev);
3617 	gaudi_stop_pci_dma_qmans(hdev);
3618 
3619 	msleep(wait_timeout_ms);
3620 
3621 	gaudi_pci_dma_stall(hdev);
3622 	gaudi_hbm_dma_stall(hdev);
3623 	gaudi_tpc_stall(hdev);
3624 	gaudi_mme_stall(hdev);
3625 
3626 	msleep(wait_timeout_ms);
3627 
3628 	gaudi_disable_nic_qmans(hdev);
3629 	gaudi_disable_mme_qmans(hdev);
3630 	gaudi_disable_tpc_qmans(hdev);
3631 	gaudi_disable_hbm_dma_qmans(hdev);
3632 	gaudi_disable_pci_dma_qmans(hdev);
3633 
3634 	gaudi_disable_timestamp(hdev);
3635 
3636 skip_engines:
3637 	gaudi_disable_msi(hdev);
3638 }
3639 
3640 static int gaudi_mmu_init(struct hl_device *hdev)
3641 {
3642 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3643 	struct gaudi_device *gaudi = hdev->asic_specific;
3644 	u64 hop0_addr;
3645 	int rc, i;
3646 
3647 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3648 		return 0;
3649 
3650 	for (i = 0 ; i < prop->max_asid ; i++) {
3651 		hop0_addr = prop->mmu_pgt_addr +
3652 				(i * prop->dmmu.hop_table_size);
3653 
3654 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3655 		if (rc) {
3656 			dev_err(hdev->dev,
3657 				"failed to set hop0 addr for asid %d\n", i);
3658 			return rc;
3659 		}
3660 	}
3661 
3662 	/* init MMU cache manage page */
3663 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3664 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3665 
3666 	/* mem cache invalidation */
3667 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3668 
3669 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3670 	if (rc)
3671 		return rc;
3672 
3673 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3674 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3675 
3676 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3677 
3678 	/*
3679 	 * The H/W expects the first PI after init to be 1. After wraparound
3680 	 * we'll write 0.
3681 	 */
3682 	gaudi->mmu_cache_inv_pi = 1;
3683 
3684 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3685 
3686 	return 0;
3687 }
3688 
3689 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3690 {
3691 	void __iomem *dst;
3692 
3693 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3694 
3695 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3696 }
3697 
3698 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3699 {
3700 	void __iomem *dst;
3701 
3702 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3703 
3704 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3705 }
3706 
3707 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3708 {
3709 	struct dynamic_fw_load_mgr *dynamic_loader;
3710 	struct cpu_dyn_regs *dyn_regs;
3711 
3712 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3713 
3714 	/*
3715 	 * here we update initial values for few specific dynamic regs (as
3716 	 * before reading the first descriptor from FW those value has to be
3717 	 * hard-coded) in later stages of the protocol those values will be
3718 	 * updated automatically by reading the FW descriptor so data there
3719 	 * will always be up-to-date
3720 	 */
3721 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3722 	dyn_regs->kmd_msg_to_cpu =
3723 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3724 	dyn_regs->cpu_cmd_status_to_host =
3725 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3726 
3727 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3728 }
3729 
3730 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3731 {
3732 	struct static_fw_load_mgr *static_loader;
3733 
3734 	static_loader = &hdev->fw_loader.static_loader;
3735 
3736 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3739 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3740 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3741 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3742 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3743 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3744 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3745 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3746 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3747 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3748 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3749 			GAUDI_PLDM_RESET_WAIT_MSEC :
3750 			GAUDI_CPU_RESET_WAIT_MSEC;
3751 }
3752 
3753 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3754 {
3755 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3756 
3757 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3758 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3759 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3760 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3761 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3762 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3763 }
3764 
3765 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3766 {
3767 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3768 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3769 
3770 	/* fill common fields */
3771 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3772 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3773 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3774 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3775 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3776 	fw_loader->skip_bmc = !hdev->bmc_enable;
3777 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3778 	fw_loader->dram_bar_id = HBM_BAR_ID;
3779 
3780 	if (prop->dynamic_fw_load)
3781 		gaudi_init_dynamic_firmware_loader(hdev);
3782 	else
3783 		gaudi_init_static_firmware_loader(hdev);
3784 }
3785 
3786 static int gaudi_init_cpu(struct hl_device *hdev)
3787 {
3788 	struct gaudi_device *gaudi = hdev->asic_specific;
3789 	int rc;
3790 
3791 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3792 		return 0;
3793 
3794 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3795 		return 0;
3796 
3797 	/*
3798 	 * The device CPU works with 40 bits addresses.
3799 	 * This register sets the extension to 50 bits.
3800 	 */
3801 	if (!hdev->asic_prop.fw_security_enabled)
3802 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3803 
3804 	rc = hl_fw_init_cpu(hdev);
3805 
3806 	if (rc)
3807 		return rc;
3808 
3809 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3810 
3811 	return 0;
3812 }
3813 
3814 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3815 {
3816 	struct cpu_dyn_regs *dyn_regs =
3817 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3818 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3819 	struct gaudi_device *gaudi = hdev->asic_specific;
3820 	u32 status, irq_handler_offset;
3821 	struct hl_eq *eq;
3822 	struct hl_hw_queue *cpu_pq =
3823 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3824 	int err;
3825 
3826 	if (!hdev->cpu_queues_enable)
3827 		return 0;
3828 
3829 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3830 		return 0;
3831 
3832 	eq = &hdev->event_queue;
3833 
3834 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3835 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3836 
3837 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3838 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3839 
3840 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3841 			lower_32_bits(hdev->cpu_accessible_dma_address));
3842 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3843 			upper_32_bits(hdev->cpu_accessible_dma_address));
3844 
3845 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3846 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3847 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3848 
3849 	/* Used for EQ CI */
3850 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3851 
3852 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3853 
3854 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3855 
3856 	irq_handler_offset = prop->gic_interrupts_enable ?
3857 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3858 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3859 
3860 	WREG32(irq_handler_offset,
3861 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3862 
3863 	err = hl_poll_timeout(
3864 		hdev,
3865 		mmCPU_IF_QUEUE_INIT,
3866 		status,
3867 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3868 		1000,
3869 		cpu_timeout);
3870 
3871 	if (err) {
3872 		dev_err(hdev->dev,
3873 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3874 		return -EIO;
3875 	}
3876 
3877 	/* update FW application security bits */
3878 	if (prop->fw_cpu_boot_dev_sts0_valid)
3879 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3880 	if (prop->fw_cpu_boot_dev_sts1_valid)
3881 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3882 
3883 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3884 	return 0;
3885 }
3886 
3887 static void gaudi_pre_hw_init(struct hl_device *hdev)
3888 {
3889 	/* Perform read from the device to make sure device is up */
3890 	RREG32(mmHW_STATE);
3891 
3892 	if (!hdev->asic_prop.fw_security_enabled) {
3893 		/* Set the access through PCI bars (Linux driver only) as
3894 		 * secured
3895 		 */
3896 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3897 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3898 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3899 
3900 		/* Perform read to flush the waiting writes to ensure
3901 		 * configuration was set in the device
3902 		 */
3903 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3904 	}
3905 
3906 	/*
3907 	 * Let's mark in the H/W that we have reached this point. We check
3908 	 * this value in the reset_before_init function to understand whether
3909 	 * we need to reset the chip before doing H/W init. This register is
3910 	 * cleared by the H/W upon H/W reset
3911 	 */
3912 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3913 }
3914 
3915 static int gaudi_hw_init(struct hl_device *hdev)
3916 {
3917 	struct gaudi_device *gaudi = hdev->asic_specific;
3918 	int rc;
3919 
3920 	gaudi_pre_hw_init(hdev);
3921 
3922 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923 	 * So we set it here and if anyone tries to move it later to
3924 	 * a different address, there will be an error
3925 	 */
3926 	if (hdev->asic_prop.iatu_done_by_fw)
3927 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3928 
3929 	/*
3930 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3931 	 * base address of dram
3932 	 */
3933 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3934 		dev_err(hdev->dev,
3935 			"failed to map HBM bar to DRAM base address\n");
3936 		return -EIO;
3937 	}
3938 
3939 	rc = gaudi_init_cpu(hdev);
3940 	if (rc) {
3941 		dev_err(hdev->dev, "failed to initialize CPU\n");
3942 		return rc;
3943 	}
3944 
3945 	/* In case the clock gating was enabled in preboot we need to disable
3946 	 * it here before touching the MME/TPC registers.
3947 	 */
3948 	gaudi_disable_clock_gating(hdev);
3949 
3950 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3951 	gaudi_init_scrambler_sram(hdev);
3952 
3953 	/* This is here just in case we are working without CPU */
3954 	gaudi_init_scrambler_hbm(hdev);
3955 
3956 	gaudi_init_golden_registers(hdev);
3957 
3958 	rc = gaudi_mmu_init(hdev);
3959 	if (rc)
3960 		return rc;
3961 
3962 	gaudi_init_security(hdev);
3963 
3964 	gaudi_init_pci_dma_qmans(hdev);
3965 
3966 	gaudi_init_hbm_dma_qmans(hdev);
3967 
3968 	gaudi_init_mme_qmans(hdev);
3969 
3970 	gaudi_init_tpc_qmans(hdev);
3971 
3972 	gaudi_init_nic_qmans(hdev);
3973 
3974 	gaudi_enable_timestamp(hdev);
3975 
3976 	/* MSI must be enabled before CPU queues and NIC are initialized */
3977 	rc = gaudi_enable_msi(hdev);
3978 	if (rc)
3979 		goto disable_queues;
3980 
3981 	/* must be called after MSI was enabled */
3982 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3983 	if (rc) {
3984 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3985 			rc);
3986 		goto disable_msi;
3987 	}
3988 
3989 	/* Perform read from the device to flush all configuration */
3990 	RREG32(mmHW_STATE);
3991 
3992 	return 0;
3993 
3994 disable_msi:
3995 	gaudi_disable_msi(hdev);
3996 disable_queues:
3997 	gaudi_disable_mme_qmans(hdev);
3998 	gaudi_disable_pci_dma_qmans(hdev);
3999 
4000 	return rc;
4001 }
4002 
4003 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4004 {
4005 	struct cpu_dyn_regs *dyn_regs =
4006 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4008 	struct gaudi_device *gaudi = hdev->asic_specific;
4009 	bool driver_performs_reset;
4010 
4011 	if (!hard_reset) {
4012 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4013 		return 0;
4014 	}
4015 
4016 	if (hdev->pldm) {
4017 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4018 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4019 	} else {
4020 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4021 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4022 	}
4023 
4024 	if (fw_reset) {
4025 		dev_dbg(hdev->dev,
4026 			"Firmware performs HARD reset, going to wait %dms\n",
4027 			reset_timeout_ms);
4028 
4029 		goto skip_reset;
4030 	}
4031 
4032 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4033 					!hdev->asic_prop.hard_reset_done_by_fw);
4034 
4035 	/* Set device to handle FLR by H/W as we will put the device CPU to
4036 	 * halt mode
4037 	 */
4038 	if (driver_performs_reset)
4039 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4040 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4041 
4042 	/* If linux is loaded in the device CPU we need to communicate with it
4043 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044 	 * registers in case of old F/Ws
4045 	 */
4046 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4047 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4048 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4049 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4050 
4051 		WREG32(irq_handler_offset,
4052 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4053 
4054 		/* This is a hail-mary attempt to revive the card in the small chance that the
4055 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4056 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4057 		 * reset as if Linux wasn't loaded.
4058 		 *
4059 		 * We do it only if the reset cause was HB, because that would be the indication
4060 		 * of such an event.
4061 		 *
4062 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4063 		 * damage.
4064 		 */
4065 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4066 			if (hdev->asic_prop.hard_reset_done_by_fw)
4067 				hl_fw_ask_hard_reset_without_linux(hdev);
4068 			else
4069 				hl_fw_ask_halt_machine_without_linux(hdev);
4070 		}
4071 	} else {
4072 		if (hdev->asic_prop.hard_reset_done_by_fw)
4073 			hl_fw_ask_hard_reset_without_linux(hdev);
4074 		else
4075 			hl_fw_ask_halt_machine_without_linux(hdev);
4076 	}
4077 
4078 	if (driver_performs_reset) {
4079 
4080 		/* Configure the reset registers. Must be done as early as
4081 		 * possible in case we fail during H/W initialization
4082 		 */
4083 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4084 						(CFG_RST_H_DMA_MASK |
4085 						CFG_RST_H_MME_MASK |
4086 						CFG_RST_H_SM_MASK |
4087 						CFG_RST_H_TPC_7_MASK));
4088 
4089 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4090 
4091 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4092 						(CFG_RST_H_HBM_MASK |
4093 						CFG_RST_H_TPC_7_MASK |
4094 						CFG_RST_H_NIC_MASK |
4095 						CFG_RST_H_SM_MASK |
4096 						CFG_RST_H_DMA_MASK |
4097 						CFG_RST_H_MME_MASK |
4098 						CFG_RST_H_CPU_MASK |
4099 						CFG_RST_H_MMU_MASK));
4100 
4101 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4102 						(CFG_RST_L_IF_MASK |
4103 						CFG_RST_L_PSOC_MASK |
4104 						CFG_RST_L_TPC_MASK));
4105 
4106 		msleep(cpu_timeout_ms);
4107 
4108 		/* Tell ASIC not to re-initialize PCIe */
4109 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4110 
4111 		/* Restart BTL/BLR upon hard-reset */
4112 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4113 
4114 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4115 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4116 
4117 		dev_dbg(hdev->dev,
4118 			"Issued HARD reset command, going to wait %dms\n",
4119 			reset_timeout_ms);
4120 	} else {
4121 		dev_dbg(hdev->dev,
4122 			"Firmware performs HARD reset, going to wait %dms\n",
4123 			reset_timeout_ms);
4124 	}
4125 
4126 skip_reset:
4127 	/*
4128 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4129 	 * itself is in reset. Need to wait until the reset is deasserted
4130 	 */
4131 	msleep(reset_timeout_ms);
4132 
4133 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4134 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4135 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4136 		return -ETIMEDOUT;
4137 	}
4138 
4139 	if (gaudi) {
4140 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4141 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4142 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4143 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4144 						HW_CAP_HBM_SCRAMBLER);
4145 
4146 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4147 
4148 		hdev->device_cpu_is_halted = false;
4149 	}
4150 	return 0;
4151 }
4152 
4153 static int gaudi_suspend(struct hl_device *hdev)
4154 {
4155 	return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4156 }
4157 
4158 static int gaudi_resume(struct hl_device *hdev)
4159 {
4160 	return gaudi_init_iatu(hdev);
4161 }
4162 
4163 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4164 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4165 {
4166 	int rc;
4167 
4168 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4169 			VM_DONTCOPY | VM_NORESERVE);
4170 
4171 #ifdef _HAS_DMA_MMAP_COHERENT
4172 	/*
4173 	 * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
4174 	 * so vm_insert_page() can handle it safely. Without this, the kernel
4175 	 * may BUG_ON due to VM_PFNMAP.
4176 	 */
4177 	if (is_vmalloc_addr(cpu_addr))
4178 		vm_flags_set(vma, VM_MIXEDMAP);
4179 
4180 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4181 				(dma_addr - HOST_PHYS_BASE), size);
4182 	if (rc)
4183 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4184 #else
4185 
4186 	rc = remap_pfn_range(vma, vma->vm_start,
4187 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
4188 				size, vma->vm_page_prot);
4189 	if (rc)
4190 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
4191 
4192  #endif
4193 
4194 
4195 	return rc;
4196 }
4197 
4198 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4199 {
4200 	struct cpu_dyn_regs *dyn_regs =
4201 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4202 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4203 	struct gaudi_device *gaudi = hdev->asic_specific;
4204 	bool invalid_queue = false;
4205 	int dma_id;
4206 
4207 	switch (hw_queue_id) {
4208 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4209 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4210 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4211 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4212 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4213 		break;
4214 
4215 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4216 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4217 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4218 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4219 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4220 		break;
4221 
4222 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4223 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4224 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4225 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4226 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4227 		break;
4228 
4229 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4230 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4231 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4232 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4233 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4234 		break;
4235 
4236 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4237 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4238 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4239 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4240 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4241 		break;
4242 
4243 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4244 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4245 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4246 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4247 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4248 		break;
4249 
4250 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4251 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4252 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4253 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4254 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4255 		break;
4256 
4257 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4258 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4259 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4260 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4261 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4262 		break;
4263 
4264 	case GAUDI_QUEUE_ID_CPU_PQ:
4265 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4266 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4267 		else
4268 			invalid_queue = true;
4269 		break;
4270 
4271 	case GAUDI_QUEUE_ID_MME_0_0:
4272 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4273 		break;
4274 
4275 	case GAUDI_QUEUE_ID_MME_0_1:
4276 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4277 		break;
4278 
4279 	case GAUDI_QUEUE_ID_MME_0_2:
4280 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4281 		break;
4282 
4283 	case GAUDI_QUEUE_ID_MME_0_3:
4284 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4285 		break;
4286 
4287 	case GAUDI_QUEUE_ID_MME_1_0:
4288 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4289 		break;
4290 
4291 	case GAUDI_QUEUE_ID_MME_1_1:
4292 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4293 		break;
4294 
4295 	case GAUDI_QUEUE_ID_MME_1_2:
4296 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4297 		break;
4298 
4299 	case GAUDI_QUEUE_ID_MME_1_3:
4300 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4301 		break;
4302 
4303 	case GAUDI_QUEUE_ID_TPC_0_0:
4304 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4305 		break;
4306 
4307 	case GAUDI_QUEUE_ID_TPC_0_1:
4308 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4309 		break;
4310 
4311 	case GAUDI_QUEUE_ID_TPC_0_2:
4312 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4313 		break;
4314 
4315 	case GAUDI_QUEUE_ID_TPC_0_3:
4316 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4317 		break;
4318 
4319 	case GAUDI_QUEUE_ID_TPC_1_0:
4320 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4321 		break;
4322 
4323 	case GAUDI_QUEUE_ID_TPC_1_1:
4324 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4325 		break;
4326 
4327 	case GAUDI_QUEUE_ID_TPC_1_2:
4328 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4329 		break;
4330 
4331 	case GAUDI_QUEUE_ID_TPC_1_3:
4332 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4333 		break;
4334 
4335 	case GAUDI_QUEUE_ID_TPC_2_0:
4336 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4337 		break;
4338 
4339 	case GAUDI_QUEUE_ID_TPC_2_1:
4340 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4341 		break;
4342 
4343 	case GAUDI_QUEUE_ID_TPC_2_2:
4344 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4345 		break;
4346 
4347 	case GAUDI_QUEUE_ID_TPC_2_3:
4348 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4349 		break;
4350 
4351 	case GAUDI_QUEUE_ID_TPC_3_0:
4352 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4353 		break;
4354 
4355 	case GAUDI_QUEUE_ID_TPC_3_1:
4356 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4357 		break;
4358 
4359 	case GAUDI_QUEUE_ID_TPC_3_2:
4360 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4361 		break;
4362 
4363 	case GAUDI_QUEUE_ID_TPC_3_3:
4364 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4365 		break;
4366 
4367 	case GAUDI_QUEUE_ID_TPC_4_0:
4368 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4369 		break;
4370 
4371 	case GAUDI_QUEUE_ID_TPC_4_1:
4372 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4373 		break;
4374 
4375 	case GAUDI_QUEUE_ID_TPC_4_2:
4376 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4377 		break;
4378 
4379 	case GAUDI_QUEUE_ID_TPC_4_3:
4380 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4381 		break;
4382 
4383 	case GAUDI_QUEUE_ID_TPC_5_0:
4384 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4385 		break;
4386 
4387 	case GAUDI_QUEUE_ID_TPC_5_1:
4388 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4389 		break;
4390 
4391 	case GAUDI_QUEUE_ID_TPC_5_2:
4392 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4393 		break;
4394 
4395 	case GAUDI_QUEUE_ID_TPC_5_3:
4396 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4397 		break;
4398 
4399 	case GAUDI_QUEUE_ID_TPC_6_0:
4400 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4401 		break;
4402 
4403 	case GAUDI_QUEUE_ID_TPC_6_1:
4404 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4405 		break;
4406 
4407 	case GAUDI_QUEUE_ID_TPC_6_2:
4408 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4409 		break;
4410 
4411 	case GAUDI_QUEUE_ID_TPC_6_3:
4412 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4413 		break;
4414 
4415 	case GAUDI_QUEUE_ID_TPC_7_0:
4416 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4417 		break;
4418 
4419 	case GAUDI_QUEUE_ID_TPC_7_1:
4420 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4421 		break;
4422 
4423 	case GAUDI_QUEUE_ID_TPC_7_2:
4424 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4425 		break;
4426 
4427 	case GAUDI_QUEUE_ID_TPC_7_3:
4428 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4429 		break;
4430 
4431 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4432 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4433 			invalid_queue = true;
4434 
4435 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4436 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4437 		break;
4438 
4439 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4440 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4441 			invalid_queue = true;
4442 
4443 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4444 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4445 		break;
4446 
4447 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4448 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4449 			invalid_queue = true;
4450 
4451 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4452 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4453 		break;
4454 
4455 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4456 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4457 			invalid_queue = true;
4458 
4459 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4460 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4461 		break;
4462 
4463 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4464 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4465 			invalid_queue = true;
4466 
4467 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4468 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4469 		break;
4470 
4471 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4472 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4473 			invalid_queue = true;
4474 
4475 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4476 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4477 		break;
4478 
4479 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4480 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4481 			invalid_queue = true;
4482 
4483 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4484 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4485 		break;
4486 
4487 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4488 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4489 			invalid_queue = true;
4490 
4491 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4492 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4493 		break;
4494 
4495 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4496 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4497 			invalid_queue = true;
4498 
4499 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4500 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4501 		break;
4502 
4503 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4504 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4505 			invalid_queue = true;
4506 
4507 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4508 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4509 		break;
4510 
4511 	default:
4512 		invalid_queue = true;
4513 	}
4514 
4515 	if (invalid_queue) {
4516 		/* Should never get here */
4517 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4518 			hw_queue_id);
4519 		return;
4520 	}
4521 
4522 	db_value = pi;
4523 
4524 	/* ring the doorbell */
4525 	WREG32(db_reg_offset, db_value);
4526 
4527 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4528 		/* make sure device CPU will read latest data from host */
4529 		mb();
4530 
4531 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4532 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4533 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4534 
4535 		WREG32(irq_handler_offset,
4536 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4537 	}
4538 }
4539 
4540 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4541 				struct hl_bd *bd)
4542 {
4543 	__le64 *pbd = (__le64 *) bd;
4544 
4545 	/* The QMANs are on the host memory so a simple copy suffice */
4546 	pqe[0] = pbd[0];
4547 	pqe[1] = pbd[1];
4548 }
4549 
4550 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4551 					dma_addr_t *dma_handle, gfp_t flags)
4552 {
4553 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4554 						dma_handle, flags);
4555 
4556 	/* Shift to the device's base physical address of host memory */
4557 	if (kernel_addr)
4558 		*dma_handle += HOST_PHYS_BASE;
4559 
4560 	return kernel_addr;
4561 }
4562 
4563 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4564 		void *cpu_addr, dma_addr_t dma_handle)
4565 {
4566 	/* Cancel the device's base physical address of host memory */
4567 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4568 
4569 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4570 }
4571 
4572 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4573 {
4574 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4575 	u64 cur_addr = prop->dram_user_base_address;
4576 	u32 chunk_size, busy;
4577 	int rc, dma_id;
4578 
4579 	while (cur_addr < prop->dram_end_address) {
4580 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4581 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4582 
4583 			chunk_size =
4584 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4585 
4586 			dev_dbg(hdev->dev,
4587 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4588 				cur_addr, cur_addr + chunk_size);
4589 
4590 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4591 					lower_32_bits(val));
4592 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4593 					upper_32_bits(val));
4594 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4595 						lower_32_bits(cur_addr));
4596 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4597 						upper_32_bits(cur_addr));
4598 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4599 					chunk_size);
4600 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4601 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4602 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4603 
4604 			cur_addr += chunk_size;
4605 
4606 			if (cur_addr == prop->dram_end_address)
4607 				break;
4608 		}
4609 
4610 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4611 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4612 
4613 			rc = hl_poll_timeout(
4614 				hdev,
4615 				mmDMA0_CORE_STS0 + dma_offset,
4616 				busy,
4617 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4618 				1000,
4619 				HBM_SCRUBBING_TIMEOUT_US);
4620 
4621 			if (rc) {
4622 				dev_err(hdev->dev,
4623 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4624 					dma_id);
4625 				return -EIO;
4626 			}
4627 		}
4628 	}
4629 
4630 	return 0;
4631 }
4632 
4633 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4634 {
4635 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4636 	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4637 	u64 addr, size, val = hdev->memory_scrub_val;
4638 	ktime_t timeout;
4639 	int rc = 0;
4640 
4641 	if (!hdev->memory_scrub)
4642 		return 0;
4643 
4644 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4645 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4646 		if (ktime_compare(ktime_get(), timeout) > 0) {
4647 			dev_err(hdev->dev, "waiting for idle timeout\n");
4648 			return -ETIMEDOUT;
4649 		}
4650 		usleep_range((1000 >> 2) + 1, 1000);
4651 	}
4652 
4653 	/* Scrub SRAM */
4654 	addr = prop->sram_user_base_address;
4655 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4656 
4657 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4658 			addr, addr + size, val);
4659 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4660 	if (rc) {
4661 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4662 		return rc;
4663 	}
4664 
4665 	/* Scrub HBM using all DMA channels in parallel */
4666 	rc = gaudi_scrub_device_dram(hdev, val);
4667 	if (rc) {
4668 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4669 		return rc;
4670 	}
4671 
4672 	return 0;
4673 }
4674 
4675 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4676 				u32 queue_id, dma_addr_t *dma_handle,
4677 				u16 *queue_len)
4678 {
4679 	struct gaudi_device *gaudi = hdev->asic_specific;
4680 	struct gaudi_internal_qman_info *q;
4681 
4682 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4683 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4684 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4685 		return NULL;
4686 	}
4687 
4688 	q = &gaudi->internal_qmans[queue_id];
4689 	*dma_handle = q->pq_dma_addr;
4690 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4691 
4692 	return q->pq_kernel_addr;
4693 }
4694 
4695 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4696 				u16 len, u32 timeout, u64 *result)
4697 {
4698 	struct gaudi_device *gaudi = hdev->asic_specific;
4699 
4700 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4701 		if (result)
4702 			*result = 0;
4703 		return 0;
4704 	}
4705 
4706 	if (!timeout)
4707 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4708 
4709 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4710 						timeout, result);
4711 }
4712 
4713 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4714 {
4715 	struct packet_msg_prot *fence_pkt;
4716 	dma_addr_t pkt_dma_addr;
4717 	u32 fence_val, tmp, timeout_usec;
4718 	dma_addr_t fence_dma_addr;
4719 	u32 *fence_ptr;
4720 	int rc;
4721 
4722 	if (hdev->pldm)
4723 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4724 	else
4725 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4726 
4727 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4728 
4729 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4730 	if (!fence_ptr) {
4731 		dev_err(hdev->dev,
4732 			"Failed to allocate memory for H/W queue %d testing\n",
4733 			hw_queue_id);
4734 		return -ENOMEM;
4735 	}
4736 
4737 	*fence_ptr = 0;
4738 
4739 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4740 						&pkt_dma_addr);
4741 	if (!fence_pkt) {
4742 		dev_err(hdev->dev,
4743 			"Failed to allocate packet for H/W queue %d testing\n",
4744 			hw_queue_id);
4745 		rc = -ENOMEM;
4746 		goto free_fence_ptr;
4747 	}
4748 
4749 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4750 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4751 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4752 
4753 	fence_pkt->ctl = cpu_to_le32(tmp);
4754 	fence_pkt->value = cpu_to_le32(fence_val);
4755 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4756 
4757 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4758 					sizeof(struct packet_msg_prot),
4759 					pkt_dma_addr);
4760 	if (rc) {
4761 		dev_err(hdev->dev,
4762 			"Failed to send fence packet to H/W queue %d\n",
4763 			hw_queue_id);
4764 		goto free_pkt;
4765 	}
4766 
4767 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4768 					1000, timeout_usec, true);
4769 
4770 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4771 
4772 	if (rc == -ETIMEDOUT) {
4773 		dev_err(hdev->dev,
4774 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4775 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4776 		rc = -EIO;
4777 	}
4778 
4779 free_pkt:
4780 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4781 free_fence_ptr:
4782 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4783 	return rc;
4784 }
4785 
4786 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4787 {
4788 	struct gaudi_device *gaudi = hdev->asic_specific;
4789 
4790 	/*
4791 	 * check capability here as send_cpu_message() won't update the result
4792 	 * value if no capability
4793 	 */
4794 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4795 		return 0;
4796 
4797 	return hl_fw_test_cpu_queue(hdev);
4798 }
4799 
4800 static int gaudi_test_queues(struct hl_device *hdev)
4801 {
4802 	int i, rc, ret_val = 0;
4803 
4804 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4805 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4806 			rc = gaudi_test_queue(hdev, i);
4807 			if (rc)
4808 				ret_val = -EINVAL;
4809 		}
4810 	}
4811 
4812 	rc = gaudi_test_cpu_queue(hdev);
4813 	if (rc)
4814 		ret_val = -EINVAL;
4815 
4816 	return ret_val;
4817 }
4818 
4819 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4820 		gfp_t mem_flags, dma_addr_t *dma_handle)
4821 {
4822 	void *kernel_addr;
4823 
4824 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4825 		return NULL;
4826 
4827 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4828 
4829 	/* Shift to the device's base physical address of host memory */
4830 	if (kernel_addr)
4831 		*dma_handle += HOST_PHYS_BASE;
4832 
4833 	return kernel_addr;
4834 }
4835 
4836 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4837 			dma_addr_t dma_addr)
4838 {
4839 	/* Cancel the device's base physical address of host memory */
4840 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4841 
4842 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4843 }
4844 
4845 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4846 					size_t size, dma_addr_t *dma_handle)
4847 {
4848 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4849 }
4850 
4851 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4852 						size_t size, void *vaddr)
4853 {
4854 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4855 }
4856 
4857 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4858 {
4859 	struct scatterlist *sg, *sg_next_iter;
4860 	u32 count, dma_desc_cnt;
4861 	u64 len, len_next;
4862 	dma_addr_t addr, addr_next;
4863 
4864 	dma_desc_cnt = 0;
4865 
4866 	for_each_sgtable_dma_sg(sgt, sg, count) {
4867 		len = sg_dma_len(sg);
4868 		addr = sg_dma_address(sg);
4869 
4870 		if (len == 0)
4871 			break;
4872 
4873 		while ((count + 1) < sgt->nents) {
4874 			sg_next_iter = sg_next(sg);
4875 			len_next = sg_dma_len(sg_next_iter);
4876 			addr_next = sg_dma_address(sg_next_iter);
4877 
4878 			if (len_next == 0)
4879 				break;
4880 
4881 			if ((addr + len == addr_next) &&
4882 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4883 				len += len_next;
4884 				count++;
4885 				sg = sg_next_iter;
4886 			} else {
4887 				break;
4888 			}
4889 		}
4890 
4891 		dma_desc_cnt++;
4892 	}
4893 
4894 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4895 }
4896 
4897 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4898 				struct hl_cs_parser *parser,
4899 				struct packet_lin_dma *user_dma_pkt,
4900 				u64 addr, enum dma_data_direction dir)
4901 {
4902 	struct hl_userptr *userptr;
4903 	int rc;
4904 
4905 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4906 			parser->job_userptr_list, &userptr))
4907 		goto already_pinned;
4908 
4909 	userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4910 	if (!userptr)
4911 		return -ENOMEM;
4912 
4913 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4914 				userptr);
4915 	if (rc)
4916 		goto free_userptr;
4917 
4918 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4919 
4920 	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4921 	if (rc) {
4922 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4923 		goto unpin_memory;
4924 	}
4925 
4926 	userptr->dma_mapped = true;
4927 	userptr->dir = dir;
4928 
4929 already_pinned:
4930 	parser->patched_cb_size +=
4931 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4932 
4933 	return 0;
4934 
4935 unpin_memory:
4936 	list_del(&userptr->job_node);
4937 	hl_unpin_host_memory(hdev, userptr);
4938 free_userptr:
4939 	kfree(userptr);
4940 	return rc;
4941 }
4942 
4943 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4944 				struct hl_cs_parser *parser,
4945 				struct packet_lin_dma *user_dma_pkt,
4946 				bool src_in_host)
4947 {
4948 	enum dma_data_direction dir;
4949 	bool skip_host_mem_pin = false, user_memset;
4950 	u64 addr;
4951 	int rc = 0;
4952 
4953 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4954 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4955 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4956 
4957 	if (src_in_host) {
4958 		if (user_memset)
4959 			skip_host_mem_pin = true;
4960 
4961 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4962 		dir = DMA_TO_DEVICE;
4963 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4964 	} else {
4965 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4966 		dir = DMA_FROM_DEVICE;
4967 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4968 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4969 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4970 	}
4971 
4972 	if (skip_host_mem_pin)
4973 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4974 	else
4975 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4976 						addr, dir);
4977 
4978 	return rc;
4979 }
4980 
4981 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4982 				struct hl_cs_parser *parser,
4983 				struct packet_lin_dma *user_dma_pkt)
4984 {
4985 	bool src_in_host = false;
4986 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4987 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4988 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4989 
4990 	dev_dbg(hdev->dev, "DMA packet details:\n");
4991 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4992 				le64_to_cpu(user_dma_pkt->src_addr));
4993 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4994 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4995 
4996 	/*
4997 	 * Special handling for DMA with size 0. Bypass all validations
4998 	 * because no transactions will be done except for WR_COMP, which
4999 	 * is not a security issue
5000 	 */
5001 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5002 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5003 		return 0;
5004 	}
5005 
5006 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5007 		src_in_host = true;
5008 
5009 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5010 						src_in_host);
5011 }
5012 
5013 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5014 					struct hl_cs_parser *parser,
5015 					struct packet_load_and_exe *user_pkt)
5016 {
5017 	u32 cfg;
5018 
5019 	cfg = le32_to_cpu(user_pkt->cfg);
5020 
5021 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5022 		dev_err(hdev->dev,
5023 			"User not allowed to use Load and Execute\n");
5024 		return -EPERM;
5025 	}
5026 
5027 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5028 
5029 	return 0;
5030 }
5031 
5032 static int gaudi_validate_cb(struct hl_device *hdev,
5033 			struct hl_cs_parser *parser, bool is_mmu)
5034 {
5035 	u32 cb_parsed_length = 0;
5036 	int rc = 0;
5037 
5038 	parser->patched_cb_size = 0;
5039 
5040 	/* cb_user_size is more than 0 so loop will always be executed */
5041 	while (cb_parsed_length < parser->user_cb_size) {
5042 		enum packet_id pkt_id;
5043 		u16 pkt_size;
5044 		struct gaudi_packet *user_pkt;
5045 
5046 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5047 
5048 		pkt_id = (enum packet_id) (
5049 				(le64_to_cpu(user_pkt->header) &
5050 				PACKET_HEADER_PACKET_ID_MASK) >>
5051 					PACKET_HEADER_PACKET_ID_SHIFT);
5052 
5053 		if (!validate_packet_id(pkt_id)) {
5054 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5055 			rc = -EINVAL;
5056 			break;
5057 		}
5058 
5059 		pkt_size = gaudi_packet_sizes[pkt_id];
5060 		cb_parsed_length += pkt_size;
5061 		if (cb_parsed_length > parser->user_cb_size) {
5062 			dev_err(hdev->dev,
5063 				"packet 0x%x is out of CB boundary\n", pkt_id);
5064 			rc = -EINVAL;
5065 			break;
5066 		}
5067 
5068 		switch (pkt_id) {
5069 		case PACKET_MSG_PROT:
5070 			dev_err(hdev->dev,
5071 				"User not allowed to use MSG_PROT\n");
5072 			rc = -EPERM;
5073 			break;
5074 
5075 		case PACKET_CP_DMA:
5076 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5077 			rc = -EPERM;
5078 			break;
5079 
5080 		case PACKET_STOP:
5081 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5082 			rc = -EPERM;
5083 			break;
5084 
5085 		case PACKET_WREG_BULK:
5086 			dev_err(hdev->dev,
5087 				"User not allowed to use WREG_BULK\n");
5088 			rc = -EPERM;
5089 			break;
5090 
5091 		case PACKET_LOAD_AND_EXE:
5092 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5093 				(struct packet_load_and_exe *) user_pkt);
5094 			break;
5095 
5096 		case PACKET_LIN_DMA:
5097 			parser->contains_dma_pkt = true;
5098 			if (is_mmu)
5099 				parser->patched_cb_size += pkt_size;
5100 			else
5101 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5102 					(struct packet_lin_dma *) user_pkt);
5103 			break;
5104 
5105 		case PACKET_WREG_32:
5106 		case PACKET_MSG_LONG:
5107 		case PACKET_MSG_SHORT:
5108 		case PACKET_REPEAT:
5109 		case PACKET_FENCE:
5110 		case PACKET_NOP:
5111 		case PACKET_ARB_POINT:
5112 			parser->patched_cb_size += pkt_size;
5113 			break;
5114 
5115 		default:
5116 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5117 				pkt_id);
5118 			rc = -EINVAL;
5119 			break;
5120 		}
5121 
5122 		if (rc)
5123 			break;
5124 	}
5125 
5126 	/*
5127 	 * The new CB should have space at the end for two MSG_PROT packets:
5128 	 * 1. Optional NOP padding for cacheline alignment
5129 	 * 2. A packet that will act as a completion packet
5130 	 * 3. A packet that will generate MSI interrupt
5131 	 */
5132 	if (parser->completion)
5133 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5134 			parser->patched_cb_size);
5135 
5136 	return rc;
5137 }
5138 
5139 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5140 				struct hl_cs_parser *parser,
5141 				struct packet_lin_dma *user_dma_pkt,
5142 				struct packet_lin_dma *new_dma_pkt,
5143 				u32 *new_dma_pkt_size)
5144 {
5145 	struct hl_userptr *userptr;
5146 	struct scatterlist *sg, *sg_next_iter;
5147 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5148 	u64 len, len_next;
5149 	dma_addr_t dma_addr, dma_addr_next;
5150 	u64 device_memory_addr, addr;
5151 	enum dma_data_direction dir;
5152 	struct sg_table *sgt;
5153 	bool src_in_host = false;
5154 	bool skip_host_mem_pin = false;
5155 	bool user_memset;
5156 
5157 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5158 
5159 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5160 		src_in_host = true;
5161 
5162 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5163 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5164 
5165 	if (src_in_host) {
5166 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5167 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5168 		dir = DMA_TO_DEVICE;
5169 		if (user_memset)
5170 			skip_host_mem_pin = true;
5171 	} else {
5172 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5173 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5174 		dir = DMA_FROM_DEVICE;
5175 	}
5176 
5177 	if ((!skip_host_mem_pin) &&
5178 		(!hl_userptr_is_pinned(hdev, addr,
5179 					le32_to_cpu(user_dma_pkt->tsize),
5180 					parser->job_userptr_list, &userptr))) {
5181 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5182 				addr, user_dma_pkt->tsize);
5183 		return -EFAULT;
5184 	}
5185 
5186 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5187 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5188 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5189 		return 0;
5190 	}
5191 
5192 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5193 
5194 	sgt = userptr->sgt;
5195 	dma_desc_cnt = 0;
5196 
5197 	for_each_sgtable_dma_sg(sgt, sg, count) {
5198 		len = sg_dma_len(sg);
5199 		dma_addr = sg_dma_address(sg);
5200 
5201 		if (len == 0)
5202 			break;
5203 
5204 		while ((count + 1) < sgt->nents) {
5205 			sg_next_iter = sg_next(sg);
5206 			len_next = sg_dma_len(sg_next_iter);
5207 			dma_addr_next = sg_dma_address(sg_next_iter);
5208 
5209 			if (len_next == 0)
5210 				break;
5211 
5212 			if ((dma_addr + len == dma_addr_next) &&
5213 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5214 				len += len_next;
5215 				count++;
5216 				sg = sg_next_iter;
5217 			} else {
5218 				break;
5219 			}
5220 		}
5221 
5222 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5223 		if (likely(dma_desc_cnt))
5224 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5225 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5226 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5227 		new_dma_pkt->tsize = cpu_to_le32(len);
5228 
5229 		if (dir == DMA_TO_DEVICE) {
5230 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5231 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5232 		} else {
5233 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5234 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5235 		}
5236 
5237 		if (!user_memset)
5238 			device_memory_addr += len;
5239 		dma_desc_cnt++;
5240 		new_dma_pkt++;
5241 	}
5242 
5243 	if (!dma_desc_cnt) {
5244 		dev_err(hdev->dev,
5245 			"Error of 0 SG entries when patching DMA packet\n");
5246 		return -EFAULT;
5247 	}
5248 
5249 	/* Fix the last dma packet - wrcomp must be as user set it */
5250 	new_dma_pkt--;
5251 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5252 
5253 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5254 
5255 	return 0;
5256 }
5257 
5258 static int gaudi_patch_cb(struct hl_device *hdev,
5259 				struct hl_cs_parser *parser)
5260 {
5261 	u32 cb_parsed_length = 0;
5262 	u32 cb_patched_cur_length = 0;
5263 	int rc = 0;
5264 
5265 	/* cb_user_size is more than 0 so loop will always be executed */
5266 	while (cb_parsed_length < parser->user_cb_size) {
5267 		enum packet_id pkt_id;
5268 		u16 pkt_size;
5269 		u32 new_pkt_size = 0;
5270 		struct gaudi_packet *user_pkt, *kernel_pkt;
5271 
5272 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5273 		kernel_pkt = parser->patched_cb->kernel_address +
5274 					cb_patched_cur_length;
5275 
5276 		pkt_id = (enum packet_id) (
5277 				(le64_to_cpu(user_pkt->header) &
5278 				PACKET_HEADER_PACKET_ID_MASK) >>
5279 					PACKET_HEADER_PACKET_ID_SHIFT);
5280 
5281 		if (!validate_packet_id(pkt_id)) {
5282 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5283 			rc = -EINVAL;
5284 			break;
5285 		}
5286 
5287 		pkt_size = gaudi_packet_sizes[pkt_id];
5288 		cb_parsed_length += pkt_size;
5289 		if (cb_parsed_length > parser->user_cb_size) {
5290 			dev_err(hdev->dev,
5291 				"packet 0x%x is out of CB boundary\n", pkt_id);
5292 			rc = -EINVAL;
5293 			break;
5294 		}
5295 
5296 		switch (pkt_id) {
5297 		case PACKET_LIN_DMA:
5298 			rc = gaudi_patch_dma_packet(hdev, parser,
5299 					(struct packet_lin_dma *) user_pkt,
5300 					(struct packet_lin_dma *) kernel_pkt,
5301 					&new_pkt_size);
5302 			cb_patched_cur_length += new_pkt_size;
5303 			break;
5304 
5305 		case PACKET_MSG_PROT:
5306 			dev_err(hdev->dev,
5307 				"User not allowed to use MSG_PROT\n");
5308 			rc = -EPERM;
5309 			break;
5310 
5311 		case PACKET_CP_DMA:
5312 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5313 			rc = -EPERM;
5314 			break;
5315 
5316 		case PACKET_STOP:
5317 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5318 			rc = -EPERM;
5319 			break;
5320 
5321 		case PACKET_WREG_32:
5322 		case PACKET_WREG_BULK:
5323 		case PACKET_MSG_LONG:
5324 		case PACKET_MSG_SHORT:
5325 		case PACKET_REPEAT:
5326 		case PACKET_FENCE:
5327 		case PACKET_NOP:
5328 		case PACKET_ARB_POINT:
5329 		case PACKET_LOAD_AND_EXE:
5330 			memcpy(kernel_pkt, user_pkt, pkt_size);
5331 			cb_patched_cur_length += pkt_size;
5332 			break;
5333 
5334 		default:
5335 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5336 				pkt_id);
5337 			rc = -EINVAL;
5338 			break;
5339 		}
5340 
5341 		if (rc)
5342 			break;
5343 	}
5344 
5345 	return rc;
5346 }
5347 
5348 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5349 		struct hl_cs_parser *parser)
5350 {
5351 	u64 handle;
5352 	u32 patched_cb_size;
5353 	struct hl_cb *user_cb;
5354 	int rc;
5355 
5356 	/*
5357 	 * The new CB should have space at the end for two MSG_PROT packets:
5358 	 * 1. Optional NOP padding for cacheline alignment
5359 	 * 2. A packet that will act as a completion packet
5360 	 * 3. A packet that will generate MSI interrupt
5361 	 */
5362 	if (parser->completion)
5363 		parser->patched_cb_size = parser->user_cb_size +
5364 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5365 	else
5366 		parser->patched_cb_size = parser->user_cb_size;
5367 
5368 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5369 				parser->patched_cb_size, false, false,
5370 				&handle);
5371 
5372 	if (rc) {
5373 		dev_err(hdev->dev,
5374 			"Failed to allocate patched CB for DMA CS %d\n",
5375 			rc);
5376 		return rc;
5377 	}
5378 
5379 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5380 	/* hl_cb_get should never fail */
5381 	if (!parser->patched_cb) {
5382 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5383 		rc = -EFAULT;
5384 		goto out;
5385 	}
5386 
5387 	/*
5388 	 * We are protected from overflow because the check
5389 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5390 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5391 	 *
5392 	 * There is no option to reach here without going through that check because:
5393 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5394 	 *    an external queue.
5395 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5396 	 */
5397 	memcpy(parser->patched_cb->kernel_address,
5398 		parser->user_cb->kernel_address,
5399 		parser->user_cb_size);
5400 
5401 	patched_cb_size = parser->patched_cb_size;
5402 
5403 	/* Validate patched CB instead of user CB */
5404 	user_cb = parser->user_cb;
5405 	parser->user_cb = parser->patched_cb;
5406 	rc = gaudi_validate_cb(hdev, parser, true);
5407 	parser->user_cb = user_cb;
5408 
5409 	if (rc) {
5410 		hl_cb_put(parser->patched_cb);
5411 		goto out;
5412 	}
5413 
5414 	if (patched_cb_size != parser->patched_cb_size) {
5415 		dev_err(hdev->dev, "user CB size mismatch\n");
5416 		hl_cb_put(parser->patched_cb);
5417 		rc = -EINVAL;
5418 		goto out;
5419 	}
5420 
5421 out:
5422 	/*
5423 	 * Always call cb destroy here because we still have 1 reference
5424 	 * to it by calling cb_get earlier. After the job will be completed,
5425 	 * cb_put will release it, but here we want to remove it from the
5426 	 * idr
5427 	 */
5428 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5429 
5430 	return rc;
5431 }
5432 
5433 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5434 		struct hl_cs_parser *parser)
5435 {
5436 	u64 handle;
5437 	int rc;
5438 
5439 	rc = gaudi_validate_cb(hdev, parser, false);
5440 
5441 	if (rc)
5442 		goto free_userptr;
5443 
5444 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5445 				parser->patched_cb_size, false, false,
5446 				&handle);
5447 	if (rc) {
5448 		dev_err(hdev->dev,
5449 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5450 		goto free_userptr;
5451 	}
5452 
5453 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5454 	/* hl_cb_get should never fail here */
5455 	if (!parser->patched_cb) {
5456 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5457 		rc = -EFAULT;
5458 		goto out;
5459 	}
5460 
5461 	rc = gaudi_patch_cb(hdev, parser);
5462 
5463 	if (rc)
5464 		hl_cb_put(parser->patched_cb);
5465 
5466 out:
5467 	/*
5468 	 * Always call cb destroy here because we still have 1 reference
5469 	 * to it by calling cb_get earlier. After the job will be completed,
5470 	 * cb_put will release it, but here we want to remove it from the
5471 	 * idr
5472 	 */
5473 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5474 
5475 free_userptr:
5476 	if (rc)
5477 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5478 	return rc;
5479 }
5480 
5481 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5482 					struct hl_cs_parser *parser)
5483 {
5484 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5485 	struct gaudi_device *gaudi = hdev->asic_specific;
5486 	u32 nic_queue_offset, nic_mask_q_id;
5487 
5488 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5489 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5490 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5491 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5492 
5493 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5494 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5495 			return -EINVAL;
5496 		}
5497 	}
5498 
5499 	/* For internal queue jobs just check if CB address is valid */
5500 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5501 					parser->user_cb_size,
5502 					asic_prop->sram_user_base_address,
5503 					asic_prop->sram_end_address))
5504 		return 0;
5505 
5506 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5507 					parser->user_cb_size,
5508 					asic_prop->dram_user_base_address,
5509 					asic_prop->dram_end_address))
5510 		return 0;
5511 
5512 	/* PMMU and HPMMU addresses are equal, check only one of them */
5513 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5514 					parser->user_cb_size,
5515 					asic_prop->pmmu.start_addr,
5516 					asic_prop->pmmu.end_addr))
5517 		return 0;
5518 
5519 	dev_err(hdev->dev,
5520 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5521 		parser->user_cb, parser->user_cb_size);
5522 
5523 	return -EFAULT;
5524 }
5525 
5526 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5527 {
5528 	struct gaudi_device *gaudi = hdev->asic_specific;
5529 
5530 	if (parser->queue_type == QUEUE_TYPE_INT)
5531 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5532 
5533 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5534 		return gaudi_parse_cb_mmu(hdev, parser);
5535 	else
5536 		return gaudi_parse_cb_no_mmu(hdev, parser);
5537 }
5538 
5539 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5540 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5541 				u32 msi_vec, bool eb)
5542 {
5543 	struct packet_msg_prot *cq_pkt;
5544 	struct packet_nop *cq_padding;
5545 	u64 msi_addr;
5546 	u32 tmp;
5547 
5548 	cq_padding = kernel_address + original_len;
5549 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5550 
5551 	while ((void *)cq_padding < (void *)cq_pkt) {
5552 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5553 		cq_padding++;
5554 	}
5555 
5556 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5557 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5558 
5559 	if (eb)
5560 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5561 
5562 	cq_pkt->ctl = cpu_to_le32(tmp);
5563 	cq_pkt->value = cpu_to_le32(cq_val);
5564 	cq_pkt->addr = cpu_to_le64(cq_addr);
5565 
5566 	cq_pkt++;
5567 
5568 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5569 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5570 	cq_pkt->ctl = cpu_to_le32(tmp);
5571 	cq_pkt->value = cpu_to_le32(1);
5572 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5573 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5574 }
5575 
5576 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5577 {
5578 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5579 }
5580 
5581 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5582 					u32 size, u64 val)
5583 {
5584 	struct packet_lin_dma *lin_dma_pkt;
5585 	struct hl_cs_job *job;
5586 	u32 cb_size, ctl, err_cause;
5587 	struct hl_cb *cb;
5588 	int rc;
5589 
5590 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5591 	if (!cb)
5592 		return -EFAULT;
5593 
5594 	lin_dma_pkt = cb->kernel_address;
5595 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5596 	cb_size = sizeof(*lin_dma_pkt);
5597 
5598 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5599 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5600 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5601 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5602 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5603 
5604 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5605 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5606 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5607 	lin_dma_pkt->tsize = cpu_to_le32(size);
5608 
5609 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5610 	if (!job) {
5611 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5612 		rc = -ENOMEM;
5613 		goto release_cb;
5614 	}
5615 
5616 	/* Verify DMA is OK */
5617 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5618 	if (err_cause && !hdev->init_done) {
5619 		dev_dbg(hdev->dev,
5620 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5621 			err_cause);
5622 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5623 	}
5624 
5625 	job->id = 0;
5626 	job->user_cb = cb;
5627 	atomic_inc(&job->user_cb->cs_cnt);
5628 	job->user_cb_size = cb_size;
5629 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5630 	job->patched_cb = job->user_cb;
5631 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5632 
5633 	hl_debugfs_add_job(hdev, job);
5634 
5635 	rc = gaudi_send_job_on_qman0(hdev, job);
5636 	hl_debugfs_remove_job(hdev, job);
5637 	kfree(job);
5638 	atomic_dec(&cb->cs_cnt);
5639 
5640 	/* Verify DMA is OK */
5641 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5642 	if (err_cause) {
5643 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5644 		rc = -EIO;
5645 		if (!hdev->init_done) {
5646 			dev_dbg(hdev->dev,
5647 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5648 				err_cause);
5649 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5650 		}
5651 	}
5652 
5653 release_cb:
5654 	hl_cb_put(cb);
5655 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5656 
5657 	return rc;
5658 }
5659 
5660 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5661 					u32 num_regs, u32 val)
5662 {
5663 	struct packet_msg_long *pkt;
5664 	struct hl_cs_job *job;
5665 	u32 cb_size, ctl;
5666 	struct hl_cb *cb;
5667 	int i, rc;
5668 
5669 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5670 
5671 	if (cb_size > SZ_2M) {
5672 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5673 		return -ENOMEM;
5674 	}
5675 
5676 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5677 	if (!cb)
5678 		return -EFAULT;
5679 
5680 	pkt = cb->kernel_address;
5681 
5682 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5683 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5684 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5685 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5686 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5687 
5688 	for (i = 0; i < num_regs ; i++, pkt++) {
5689 		pkt->ctl = cpu_to_le32(ctl);
5690 		pkt->value = cpu_to_le32(val);
5691 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5692 	}
5693 
5694 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5695 	if (!job) {
5696 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5697 		rc = -ENOMEM;
5698 		goto release_cb;
5699 	}
5700 
5701 	job->id = 0;
5702 	job->user_cb = cb;
5703 	atomic_inc(&job->user_cb->cs_cnt);
5704 	job->user_cb_size = cb_size;
5705 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5706 	job->patched_cb = job->user_cb;
5707 	job->job_cb_size = cb_size;
5708 
5709 	hl_debugfs_add_job(hdev, job);
5710 
5711 	rc = gaudi_send_job_on_qman0(hdev, job);
5712 	hl_debugfs_remove_job(hdev, job);
5713 	kfree(job);
5714 	atomic_dec(&cb->cs_cnt);
5715 
5716 release_cb:
5717 	hl_cb_put(cb);
5718 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5719 
5720 	return rc;
5721 }
5722 
5723 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5724 {
5725 	u64 base_addr;
5726 	u32 num_regs;
5727 	int rc;
5728 
5729 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5730 	num_regs = NUM_OF_SOB_IN_BLOCK;
5731 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5732 	if (rc) {
5733 		dev_err(hdev->dev, "failed resetting SM registers");
5734 		return -ENOMEM;
5735 	}
5736 
5737 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5738 	num_regs = NUM_OF_SOB_IN_BLOCK;
5739 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5740 	if (rc) {
5741 		dev_err(hdev->dev, "failed resetting SM registers");
5742 		return -ENOMEM;
5743 	}
5744 
5745 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5746 	num_regs = NUM_OF_SOB_IN_BLOCK;
5747 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5748 	if (rc) {
5749 		dev_err(hdev->dev, "failed resetting SM registers");
5750 		return -ENOMEM;
5751 	}
5752 
5753 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5754 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5755 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5756 	if (rc) {
5757 		dev_err(hdev->dev, "failed resetting SM registers");
5758 		return -ENOMEM;
5759 	}
5760 
5761 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5762 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5763 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5764 	if (rc) {
5765 		dev_err(hdev->dev, "failed resetting SM registers");
5766 		return -ENOMEM;
5767 	}
5768 
5769 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5770 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5771 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5772 	if (rc) {
5773 		dev_err(hdev->dev, "failed resetting SM registers");
5774 		return -ENOMEM;
5775 	}
5776 
5777 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5778 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5779 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5780 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5781 	if (rc) {
5782 		dev_err(hdev->dev, "failed resetting SM registers");
5783 		return -ENOMEM;
5784 	}
5785 
5786 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5787 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5788 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5789 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5790 	if (rc) {
5791 		dev_err(hdev->dev, "failed resetting SM registers");
5792 		return -ENOMEM;
5793 	}
5794 
5795 	return 0;
5796 }
5797 
5798 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5799 {
5800 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5801 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5802 	int i;
5803 
5804 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5805 		u64 sob_addr = CFG_BASE +
5806 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5807 				(i * sob_delta);
5808 		u32 dma_offset = i * DMA_CORE_OFFSET;
5809 
5810 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5811 				lower_32_bits(sob_addr));
5812 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5813 				upper_32_bits(sob_addr));
5814 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5815 
5816 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5817 		 * modified by the user for SRAM reduction
5818 		 */
5819 		if (i > 1)
5820 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5821 								0x00000001);
5822 	}
5823 }
5824 
5825 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5826 {
5827 	u32 qman_offset;
5828 	int i;
5829 
5830 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5831 		qman_offset = i * DMA_QMAN_OFFSET;
5832 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5833 	}
5834 
5835 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5836 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5837 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5838 	}
5839 
5840 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5841 		qman_offset = i * TPC_QMAN_OFFSET;
5842 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5843 	}
5844 
5845 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5846 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5847 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5848 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5849 	}
5850 }
5851 
5852 static int gaudi_restore_user_registers(struct hl_device *hdev)
5853 {
5854 	int rc;
5855 
5856 	rc = gaudi_restore_sm_registers(hdev);
5857 	if (rc)
5858 		return rc;
5859 
5860 	gaudi_restore_dma_registers(hdev);
5861 	gaudi_restore_qm_registers(hdev);
5862 
5863 	return 0;
5864 }
5865 
5866 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5867 {
5868 	return 0;
5869 }
5870 
5871 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5872 {
5873 	u32 size = hdev->asic_prop.mmu_pgt_size +
5874 			hdev->asic_prop.mmu_cache_mng_size;
5875 	struct gaudi_device *gaudi = hdev->asic_specific;
5876 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5877 
5878 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5879 		return 0;
5880 
5881 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5882 }
5883 
5884 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5885 {
5886 
5887 }
5888 
5889 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5890 					u32 size_to_dma, dma_addr_t dma_addr)
5891 {
5892 	u32 err_cause, val;
5893 	u64 dma_offset;
5894 	int rc;
5895 
5896 	dma_offset = dma_id * DMA_CORE_OFFSET;
5897 
5898 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5899 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5900 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5901 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5902 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5903 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5904 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5905 
5906 	rc = hl_poll_timeout(
5907 		hdev,
5908 		mmDMA0_CORE_STS0 + dma_offset,
5909 		val,
5910 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5911 		0,
5912 		1000000);
5913 
5914 	if (rc) {
5915 		dev_err(hdev->dev,
5916 			"DMA %d timed-out during reading of 0x%llx\n",
5917 			dma_id, addr);
5918 		return -EIO;
5919 	}
5920 
5921 	/* Verify DMA is OK */
5922 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5923 	if (err_cause) {
5924 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5925 		dev_dbg(hdev->dev,
5926 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5927 			err_cause);
5928 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5929 
5930 		return -EIO;
5931 	}
5932 
5933 	return 0;
5934 }
5935 
5936 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5937 				void *blob_addr)
5938 {
5939 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5940 	u32 qm_glbl_sts0, qm_cgm_sts;
5941 	u64 dma_offset, qm_offset;
5942 	dma_addr_t dma_addr;
5943 	void *kernel_addr;
5944 	bool is_eng_idle;
5945 	int rc = 0, dma_id;
5946 
5947 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5948 
5949 	if (!kernel_addr)
5950 		return -ENOMEM;
5951 
5952 	hdev->asic_funcs->hw_queues_lock(hdev);
5953 
5954 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5955 	dma_offset = dma_id * DMA_CORE_OFFSET;
5956 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5957 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5958 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5959 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5960 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5961 		      IS_DMA_IDLE(dma_core_sts0);
5962 
5963 	if (!is_eng_idle) {
5964 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5965 		dma_offset = dma_id * DMA_CORE_OFFSET;
5966 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5967 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5968 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5969 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5970 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5971 			      IS_DMA_IDLE(dma_core_sts0);
5972 
5973 		if (!is_eng_idle) {
5974 			dev_err_ratelimited(hdev->dev,
5975 				"Can't read via DMA because it is BUSY\n");
5976 			rc = -EAGAIN;
5977 			goto out;
5978 		}
5979 	}
5980 
5981 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5982 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5983 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5984 
5985 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5986 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5987 	 * ASID
5988 	 */
5989 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5990 
5991 	/* Verify DMA is OK */
5992 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5993 	if (err_cause) {
5994 		dev_dbg(hdev->dev,
5995 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5996 			err_cause);
5997 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5998 	}
5999 
6000 	pos = 0;
6001 	size_left = size;
6002 	size_to_dma = SZ_2M;
6003 
6004 	while (size_left > 0) {
6005 
6006 		if (size_left < SZ_2M)
6007 			size_to_dma = size_left;
6008 
6009 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6010 						dma_addr);
6011 		if (rc)
6012 			break;
6013 
6014 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6015 
6016 		if (size_left <= SZ_2M)
6017 			break;
6018 
6019 		pos += SZ_2M;
6020 		addr += SZ_2M;
6021 		size_left -= SZ_2M;
6022 	}
6023 
6024 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6025 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6026 	 * ASID
6027 	 */
6028 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6029 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6030 
6031 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6032 
6033 out:
6034 	hdev->asic_funcs->hw_queues_unlock(hdev);
6035 
6036 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6037 
6038 	return rc;
6039 }
6040 
6041 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6042 {
6043 	struct gaudi_device *gaudi = hdev->asic_specific;
6044 
6045 	if (hdev->reset_info.hard_reset_pending)
6046 		return U64_MAX;
6047 
6048 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6049 			(addr - gaudi->hbm_bar_cur_addr));
6050 }
6051 
6052 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6053 {
6054 	struct gaudi_device *gaudi = hdev->asic_specific;
6055 
6056 	if (hdev->reset_info.hard_reset_pending)
6057 		return;
6058 
6059 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6060 			(addr - gaudi->hbm_bar_cur_addr));
6061 }
6062 
6063 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6064 {
6065 	/* mask to zero the MMBP and ASID bits */
6066 	WREG32_AND(reg, ~0x7FF);
6067 	WREG32_OR(reg, asid);
6068 }
6069 
6070 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6071 {
6072 	struct gaudi_device *gaudi = hdev->asic_specific;
6073 
6074 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6075 		return;
6076 
6077 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6078 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6079 		return;
6080 	}
6081 
6082 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6083 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6085 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6086 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6087 
6088 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6089 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6091 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6092 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6093 
6094 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6095 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6097 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6098 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6099 
6100 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6101 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6103 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6104 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6105 
6106 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6107 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6109 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6110 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6111 
6112 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6113 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6115 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6116 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6117 
6118 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6119 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6120 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6121 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6122 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6123 
6124 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6125 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6126 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6127 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6128 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6129 
6130 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6133 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6134 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6135 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6136 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6137 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6138 
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6141 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6144 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6145 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6146 
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6152 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6153 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6154 
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6161 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6162 
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6168 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6169 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6170 
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6176 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6177 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6178 
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6185 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6186 
6187 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6193 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6194 
6195 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6201 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6202 
6203 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6212 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6213 
6214 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6215 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6221 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6222 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6223 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6224 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6225 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6226 
6227 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6228 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6229 				asid);
6230 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6231 				asid);
6232 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6233 				asid);
6234 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6235 				asid);
6236 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6237 				asid);
6238 	}
6239 
6240 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6241 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6242 				asid);
6243 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6244 				asid);
6245 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6246 				asid);
6247 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6248 				asid);
6249 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6250 				asid);
6251 	}
6252 
6253 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6254 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6255 				asid);
6256 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6257 				asid);
6258 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6259 				asid);
6260 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6261 				asid);
6262 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6263 				asid);
6264 	}
6265 
6266 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6267 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6268 				asid);
6269 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6270 				asid);
6271 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6272 				asid);
6273 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6274 				asid);
6275 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6276 				asid);
6277 	}
6278 
6279 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6280 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6281 				asid);
6282 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6283 				asid);
6284 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6285 				asid);
6286 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6287 				asid);
6288 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6289 				asid);
6290 	}
6291 
6292 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6293 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6294 				asid);
6295 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6296 				asid);
6297 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6298 				asid);
6299 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6300 				asid);
6301 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6302 				asid);
6303 	}
6304 
6305 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6306 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6307 				asid);
6308 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6309 				asid);
6310 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6311 				asid);
6312 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6313 				asid);
6314 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6315 				asid);
6316 	}
6317 
6318 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6319 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6320 				asid);
6321 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6322 				asid);
6323 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6324 				asid);
6325 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6326 				asid);
6327 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6328 				asid);
6329 	}
6330 
6331 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6332 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6333 				asid);
6334 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6335 				asid);
6336 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6337 				asid);
6338 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6339 				asid);
6340 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6341 				asid);
6342 	}
6343 
6344 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6345 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6346 				asid);
6347 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6348 				asid);
6349 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6350 				asid);
6351 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6352 				asid);
6353 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6354 				asid);
6355 	}
6356 
6357 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6358 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6359 }
6360 
6361 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6362 		struct hl_cs_job *job)
6363 {
6364 	struct packet_msg_prot *fence_pkt;
6365 	u32 *fence_ptr;
6366 	dma_addr_t fence_dma_addr;
6367 	struct hl_cb *cb;
6368 	u32 tmp, timeout, dma_offset;
6369 	int rc;
6370 
6371 	if (hdev->pldm)
6372 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6373 	else
6374 		timeout = HL_DEVICE_TIMEOUT_USEC;
6375 
6376 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6377 	if (!fence_ptr) {
6378 		dev_err(hdev->dev,
6379 			"Failed to allocate fence memory for QMAN0\n");
6380 		return -ENOMEM;
6381 	}
6382 
6383 	cb = job->patched_cb;
6384 
6385 	fence_pkt = cb->kernel_address +
6386 			job->job_cb_size - sizeof(struct packet_msg_prot);
6387 
6388 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6389 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6390 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6391 
6392 	fence_pkt->ctl = cpu_to_le32(tmp);
6393 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6394 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6395 
6396 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6397 
6398 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6399 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6400 
6401 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6402 					job->job_cb_size, cb->bus_address);
6403 	if (rc) {
6404 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6405 		goto free_fence_ptr;
6406 	}
6407 
6408 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6409 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6410 				timeout, true);
6411 
6412 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6413 
6414 	if (rc == -ETIMEDOUT) {
6415 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6416 		goto free_fence_ptr;
6417 	}
6418 
6419 free_fence_ptr:
6420 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6421 
6422 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6423 	return rc;
6424 }
6425 
6426 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6427 {
6428 	if (event_type >= GAUDI_EVENT_SIZE)
6429 		goto event_not_supported;
6430 
6431 	if (!gaudi_irq_map_table[event_type].valid)
6432 		goto event_not_supported;
6433 
6434 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6435 
6436 	return;
6437 
6438 event_not_supported:
6439 	snprintf(desc, size, "N/A");
6440 }
6441 
6442 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6443 							bool is_write, u16 *engine_id_1,
6444 							u16 *engine_id_2)
6445 {
6446 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6447 
6448 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6449 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6450 
6451 	switch (x_y) {
6452 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6453 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6454 		dma_id[0] = 0;
6455 		dma_id[1] = 2;
6456 		break;
6457 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6458 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6459 		dma_id[0] = 1;
6460 		dma_id[1] = 3;
6461 		break;
6462 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6463 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6464 		dma_id[0] = 4;
6465 		dma_id[1] = 6;
6466 		break;
6467 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6468 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6469 		dma_id[0] = 5;
6470 		dma_id[1] = 7;
6471 		break;
6472 	default:
6473 		goto unknown_initiator;
6474 	}
6475 
6476 	for (i = 0 ; i < 2 ; i++) {
6477 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6478 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6479 	}
6480 
6481 	switch (x_y) {
6482 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6483 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6484 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6485 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6486 			return "DMA0";
6487 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6488 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6489 			return "DMA2";
6490 		} else {
6491 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6492 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6493 			return "DMA0 or DMA2";
6494 		}
6495 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6496 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6497 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6498 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6499 			return "DMA1";
6500 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6501 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6502 			return "DMA3";
6503 		} else {
6504 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6505 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6506 			return "DMA1 or DMA3";
6507 		}
6508 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6509 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6510 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6511 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6512 			return "DMA4";
6513 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6514 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6515 			return "DMA6";
6516 		} else {
6517 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6518 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6519 			return "DMA4 or DMA6";
6520 		}
6521 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6522 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6523 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6524 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6525 			return "DMA5";
6526 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6527 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6528 			return "DMA7";
6529 		} else {
6530 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6531 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6532 			return "DMA5 or DMA7";
6533 		}
6534 	}
6535 
6536 unknown_initiator:
6537 	return "unknown initiator";
6538 }
6539 
6540 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6541 							u16 *engine_id_1, u16 *engine_id_2)
6542 {
6543 	u32 val, x_y, axi_id;
6544 
6545 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6546 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6547 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6548 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6549 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6550 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6551 
6552 	switch (x_y) {
6553 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6554 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6555 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6556 			return "TPC0";
6557 		}
6558 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6559 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6560 			return "NIC0";
6561 		}
6562 		break;
6563 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6564 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6565 		return "TPC1";
6566 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6567 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6568 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6569 		return "MME0";
6570 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6571 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6572 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6573 		return "MME1";
6574 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6575 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6576 		return "TPC2";
6577 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6578 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6579 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6580 			return "TPC3";
6581 		}
6582 		/* PCI, CPU or PSOC does not have engine id*/
6583 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6584 			return "PCI";
6585 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6586 			return "CPU";
6587 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6588 			return "PSOC";
6589 		break;
6590 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6591 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6592 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6593 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6594 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6595 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6596 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6597 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6598 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6599 				engine_id_1, engine_id_2);
6600 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6601 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6602 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6603 			return "TPC4";
6604 		}
6605 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6606 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6607 			return "NIC1";
6608 		}
6609 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6610 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6611 			return "NIC2";
6612 		}
6613 		break;
6614 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6615 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6616 		return "TPC5";
6617 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6618 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6619 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6620 		return "MME2";
6621 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6622 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6623 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6624 		return "MME3";
6625 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6626 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6627 		return "TPC6";
6628 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6629 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6630 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6631 			return "TPC7";
6632 		}
6633 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6634 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6635 			return "NIC4";
6636 		}
6637 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6638 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6639 			return "NIC5";
6640 		}
6641 		break;
6642 	default:
6643 		break;
6644 	}
6645 
6646 	dev_err(hdev->dev,
6647 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6648 		val,
6649 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6650 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6651 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6652 			RAZWI_INITIATOR_AXI_ID_MASK);
6653 
6654 	return "unknown initiator";
6655 }
6656 
6657 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6658 						u16 *engine_id_2, bool *is_read, bool *is_write)
6659 {
6660 
6661 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6662 		dev_err_ratelimited(hdev->dev,
6663 			"RAZWI event caused by illegal write of %s\n",
6664 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6665 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6666 		*is_write = true;
6667 	}
6668 
6669 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6670 		dev_err_ratelimited(hdev->dev,
6671 			"RAZWI event caused by illegal read of %s\n",
6672 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6673 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6674 		*is_read = true;
6675 	}
6676 }
6677 
6678 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6679 {
6680 	struct gaudi_device *gaudi = hdev->asic_specific;
6681 	u32 val;
6682 
6683 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6684 		return;
6685 
6686 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6687 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6688 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6689 		*addr <<= 32;
6690 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6691 
6692 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6693 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6694 
6695 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6696 	}
6697 
6698 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6699 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6700 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6701 		*addr <<= 32;
6702 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6703 
6704 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6705 
6706 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6707 	}
6708 }
6709 
6710 /*
6711  *  +-------------------+------------------------------------------------------+
6712  *  | Configuration Reg |                     Description                      |
6713  *  |      Address      |                                                      |
6714  *  +-------------------+------------------------------------------------------+
6715  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6716  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6717  *  |                   |0xF34 memory wrappers 63:32                           |
6718  *  |                   |0xF38 memory wrappers 95:64                           |
6719  *  |                   |0xF3C memory wrappers 127:96                          |
6720  *  +-------------------+------------------------------------------------------+
6721  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6722  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6723  *  |                   |0xF44 memory wrappers 63:32                           |
6724  *  |                   |0xF48 memory wrappers 95:64                           |
6725  *  |                   |0xF4C memory wrappers 127:96                          |
6726  *  +-------------------+------------------------------------------------------+
6727  */
6728 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6729 		struct ecc_info_extract_params *params, u64 *ecc_address,
6730 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6731 {
6732 	u32 i, num_mem_regs, reg, err_bit;
6733 	u64 err_addr, err_word = 0;
6734 
6735 	num_mem_regs = params->num_memories / 32 +
6736 			((params->num_memories % 32) ? 1 : 0);
6737 
6738 	if (params->block_address >= CFG_BASE)
6739 		params->block_address -= CFG_BASE;
6740 
6741 	if (params->derr)
6742 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6743 	else
6744 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6745 
6746 	/* Set invalid wrapper index */
6747 	*memory_wrapper_idx = 0xFF;
6748 
6749 	/* Iterate through memory wrappers, a single bit must be set */
6750 	for (i = 0 ; i < num_mem_regs ; i++) {
6751 		err_addr += i * 4;
6752 		err_word = RREG32(err_addr);
6753 		if (err_word) {
6754 			err_bit = __ffs(err_word);
6755 			*memory_wrapper_idx = err_bit + (32 * i);
6756 			break;
6757 		}
6758 	}
6759 
6760 	if (*memory_wrapper_idx == 0xFF) {
6761 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6762 		return -EINVAL;
6763 	}
6764 
6765 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6766 			*memory_wrapper_idx);
6767 
6768 	*ecc_address =
6769 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6770 	*ecc_syndrom =
6771 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6772 
6773 	/* Clear error indication */
6774 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6775 	if (params->derr)
6776 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6777 	else
6778 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6779 
6780 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6781 
6782 	return 0;
6783 }
6784 
6785 /*
6786  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6787  *
6788  * @idx: the current pi/ci value
6789  * @q_len: the queue length (power of 2)
6790  *
6791  * @return the cyclically decremented index
6792  */
6793 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6794 {
6795 	u32 mask = q_len - 1;
6796 
6797 	/*
6798 	 * modular decrement is equivalent to adding (queue_size -1)
6799 	 * later we take LSBs to make sure the value is in the
6800 	 * range [0, queue_len - 1]
6801 	 */
6802 	return (idx + q_len - 1) & mask;
6803 }
6804 
6805 /**
6806  * gaudi_handle_sw_config_stream_data - print SW config stream data
6807  *
6808  * @hdev: pointer to the habanalabs device structure
6809  * @stream: the QMAN's stream
6810  * @qman_base: base address of QMAN registers block
6811  * @event_mask: mask of the last events occurred
6812  */
6813 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6814 						u64 qman_base, u64 event_mask)
6815 {
6816 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6817 	u32 cq_ptr_lo_off, size;
6818 
6819 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6820 
6821 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6822 						stream * cq_ptr_lo_off;
6823 	cq_ptr_hi = cq_ptr_lo +
6824 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6825 	cq_tsize = cq_ptr_lo +
6826 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6827 
6828 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6829 	size = RREG32(cq_tsize);
6830 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6831 							stream, cq_ptr, size);
6832 
6833 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6834 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6835 		hdev->captured_err_info.undef_opcode.cq_size = size;
6836 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6837 	}
6838 }
6839 
6840 /**
6841  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6842  *
6843  * @hdev: pointer to the habanalabs device structure
6844  * @qid_base: first QID of the QMAN (out of 4 streams)
6845  * @stream: the QMAN's stream
6846  * @qman_base: base address of QMAN registers block
6847  * @event_mask: mask of the last events occurred
6848  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6849  */
6850 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6851 						u32 stream, u64 qman_base,
6852 						u64 event_mask,
6853 						bool pr_sw_conf)
6854 {
6855 	u32 ci, qm_ci_stream_off, queue_len;
6856 	struct hl_hw_queue *q;
6857 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6858 	int i;
6859 
6860 	q = &hdev->kernel_queues[qid_base + stream];
6861 
6862 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6863 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6864 						stream * qm_ci_stream_off;
6865 
6866 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6867 					q->int_queue_len : HL_QUEUE_LENGTH;
6868 
6869 	hdev->asic_funcs->hw_queues_lock(hdev);
6870 
6871 	if (pr_sw_conf)
6872 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6873 
6874 	ci = RREG32(pq_ci);
6875 
6876 	/* we should start printing form ci -1 */
6877 	ci = gaudi_queue_idx_dec(ci, queue_len);
6878 	memset(addr, 0, sizeof(addr));
6879 
6880 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6881 		struct hl_bd *bd;
6882 		u32 len;
6883 
6884 		bd = q->kernel_address;
6885 		bd += ci;
6886 
6887 		len = le32_to_cpu(bd->len);
6888 		/* len 0 means uninitialized entry- break */
6889 		if (!len)
6890 			break;
6891 
6892 		addr[i] = le64_to_cpu(bd->ptr);
6893 
6894 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6895 							stream, ci, addr[i], len);
6896 
6897 		/* get previous ci, wrap if needed */
6898 		ci = gaudi_queue_idx_dec(ci, queue_len);
6899 	}
6900 
6901 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6902 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6903 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6904 
6905 		if (arr_idx == 0) {
6906 			undef_opcode->timestamp = ktime_get();
6907 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6908 		}
6909 
6910 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6911 		undef_opcode->cb_addr_streams_len++;
6912 	}
6913 
6914 	hdev->asic_funcs->hw_queues_unlock(hdev);
6915 }
6916 
6917 /**
6918  * handle_qman_data_on_err - extract QMAN data on error
6919  *
6920  * @hdev: pointer to the habanalabs device structure
6921  * @qid_base: first QID of the QMAN (out of 4 streams)
6922  * @stream: the QMAN's stream
6923  * @qman_base: base address of QMAN registers block
6924  * @event_mask: mask of the last events occurred
6925  *
6926  * This function attempt to exatract as much data as possible on QMAN error.
6927  * On upper CP print the SW config stream data and last 8 PQEs.
6928  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6929  */
6930 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6931 				   u32 stream, u64 qman_base, u64 event_mask)
6932 {
6933 	u32 i;
6934 
6935 	if (stream != QMAN_STREAMS) {
6936 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6937 			qman_base, event_mask, true);
6938 		return;
6939 	}
6940 
6941 	/* handle Lower-CP */
6942 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6943 
6944 	for (i = 0; i < QMAN_STREAMS; i++)
6945 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6946 			qman_base, event_mask, false);
6947 }
6948 
6949 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6950 					  const char *qm_name,
6951 					  u64 qman_base,
6952 					  u32 qid_base,
6953 					  u64 *event_mask)
6954 {
6955 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6956 	u64 glbl_sts_addr, arb_err_addr;
6957 	char reg_desc[32];
6958 
6959 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6960 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6961 
6962 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6963 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6964 		glbl_sts_clr_val = 0;
6965 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6966 
6967 		if (!glbl_sts_val)
6968 			continue;
6969 
6970 		if (i == QMAN_STREAMS)
6971 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6972 		else
6973 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6974 
6975 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6976 			if (glbl_sts_val & BIT(j)) {
6977 				dev_err_ratelimited(hdev->dev,
6978 						"%s %s. err cause: %s\n",
6979 						qm_name, reg_desc,
6980 						gaudi_qman_error_cause[j]);
6981 				glbl_sts_clr_val |= BIT(j);
6982 			}
6983 		}
6984 		/* check for undefined opcode */
6985 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6986 				hdev->captured_err_info.undef_opcode.write_enable) {
6987 			memset(&hdev->captured_err_info.undef_opcode, 0,
6988 						sizeof(hdev->captured_err_info.undef_opcode));
6989 
6990 			hdev->captured_err_info.undef_opcode.write_enable = false;
6991 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6992 		}
6993 
6994 		/* Write 1 clear errors */
6995 		if (!hdev->stop_on_err)
6996 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6997 		else
6998 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6999 	}
7000 
7001 	arb_err_val = RREG32(arb_err_addr);
7002 
7003 	if (!arb_err_val)
7004 		return;
7005 
7006 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7007 		if (arb_err_val & BIT(j)) {
7008 			dev_err_ratelimited(hdev->dev,
7009 					"%s ARB_ERR. err cause: %s\n",
7010 					qm_name,
7011 					gaudi_qman_arb_error_cause[j]);
7012 		}
7013 	}
7014 }
7015 
7016 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7017 		struct hl_eq_sm_sei_data *sei_data)
7018 {
7019 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7020 
7021 	/* Flip the bits as the enum is ordered in the opposite way */
7022 	index = (index ^ 0x3) & 0x3;
7023 
7024 	switch (sei_data->sei_cause) {
7025 	case SM_SEI_SO_OVERFLOW:
7026 		dev_err_ratelimited(hdev->dev,
7027 			"%s SEI Error: SOB Group %u overflow/underflow",
7028 			gaudi_sync_manager_names[index],
7029 			le32_to_cpu(sei_data->sei_log));
7030 		break;
7031 	case SM_SEI_LBW_4B_UNALIGNED:
7032 		dev_err_ratelimited(hdev->dev,
7033 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7034 			gaudi_sync_manager_names[index],
7035 			le32_to_cpu(sei_data->sei_log));
7036 		break;
7037 	case SM_SEI_AXI_RESPONSE_ERR:
7038 		dev_err_ratelimited(hdev->dev,
7039 			"%s SEI Error: AXI ID %u response error",
7040 			gaudi_sync_manager_names[index],
7041 			le32_to_cpu(sei_data->sei_log));
7042 		break;
7043 	default:
7044 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7045 				le32_to_cpu(sei_data->sei_log));
7046 		break;
7047 	}
7048 }
7049 
7050 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7051 		struct hl_eq_ecc_data *ecc_data)
7052 {
7053 	struct ecc_info_extract_params params;
7054 	u64 ecc_address = 0, ecc_syndrom = 0;
7055 	u8 index, memory_wrapper_idx = 0;
7056 	bool extract_info_from_fw;
7057 	int rc;
7058 
7059 	if (hdev->asic_prop.fw_security_enabled) {
7060 		extract_info_from_fw = true;
7061 		goto extract_ecc_info;
7062 	}
7063 
7064 	switch (event_type) {
7065 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7066 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7067 		extract_info_from_fw = true;
7068 		break;
7069 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7070 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7071 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7072 		params.num_memories = 90;
7073 		params.derr = false;
7074 		extract_info_from_fw = false;
7075 		break;
7076 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7077 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7078 		params.block_address =
7079 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7080 		params.num_memories = 90;
7081 		params.derr = true;
7082 		extract_info_from_fw = false;
7083 		break;
7084 	case GAUDI_EVENT_MME0_ACC_SERR:
7085 	case GAUDI_EVENT_MME1_ACC_SERR:
7086 	case GAUDI_EVENT_MME2_ACC_SERR:
7087 	case GAUDI_EVENT_MME3_ACC_SERR:
7088 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7089 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7090 		params.num_memories = 128;
7091 		params.derr = false;
7092 		extract_info_from_fw = false;
7093 		break;
7094 	case GAUDI_EVENT_MME0_ACC_DERR:
7095 	case GAUDI_EVENT_MME1_ACC_DERR:
7096 	case GAUDI_EVENT_MME2_ACC_DERR:
7097 	case GAUDI_EVENT_MME3_ACC_DERR:
7098 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7099 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7100 		params.num_memories = 128;
7101 		params.derr = true;
7102 		extract_info_from_fw = false;
7103 		break;
7104 	case GAUDI_EVENT_MME0_SBAB_SERR:
7105 	case GAUDI_EVENT_MME1_SBAB_SERR:
7106 	case GAUDI_EVENT_MME2_SBAB_SERR:
7107 	case GAUDI_EVENT_MME3_SBAB_SERR:
7108 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7109 		params.block_address =
7110 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7111 		params.num_memories = 33;
7112 		params.derr = false;
7113 		extract_info_from_fw = false;
7114 		break;
7115 	case GAUDI_EVENT_MME0_SBAB_DERR:
7116 	case GAUDI_EVENT_MME1_SBAB_DERR:
7117 	case GAUDI_EVENT_MME2_SBAB_DERR:
7118 	case GAUDI_EVENT_MME3_SBAB_DERR:
7119 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7120 		params.block_address =
7121 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7122 		params.num_memories = 33;
7123 		params.derr = true;
7124 		extract_info_from_fw = false;
7125 		break;
7126 	default:
7127 		return;
7128 	}
7129 
7130 extract_ecc_info:
7131 	if (extract_info_from_fw) {
7132 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7133 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7134 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7135 	} else {
7136 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7137 				&ecc_syndrom, &memory_wrapper_idx);
7138 		if (rc)
7139 			return;
7140 	}
7141 
7142 	dev_err(hdev->dev,
7143 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7144 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7145 }
7146 
7147 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7148 {
7149 	u64 qman_base;
7150 	char desc[32];
7151 	u32 qid_base;
7152 	u8 index;
7153 
7154 	switch (event_type) {
7155 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7156 		index = event_type - GAUDI_EVENT_TPC0_QM;
7157 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7158 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7159 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7160 		break;
7161 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7162 		if (event_type == GAUDI_EVENT_MME0_QM) {
7163 			index = 0;
7164 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7165 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7166 			index = 2;
7167 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7168 		}
7169 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7170 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7171 		break;
7172 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7173 		index = event_type - GAUDI_EVENT_DMA0_QM;
7174 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7175 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7176 		if (index > 1)
7177 			qid_base++;
7178 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7179 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7180 		break;
7181 	case GAUDI_EVENT_NIC0_QM0:
7182 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7183 		qman_base = mmNIC0_QM0_BASE;
7184 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7185 		break;
7186 	case GAUDI_EVENT_NIC0_QM1:
7187 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7188 		qman_base = mmNIC0_QM1_BASE;
7189 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7190 		break;
7191 	case GAUDI_EVENT_NIC1_QM0:
7192 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7193 		qman_base = mmNIC1_QM0_BASE;
7194 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7195 		break;
7196 	case GAUDI_EVENT_NIC1_QM1:
7197 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7198 		qman_base = mmNIC1_QM1_BASE;
7199 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7200 		break;
7201 	case GAUDI_EVENT_NIC2_QM0:
7202 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7203 		qman_base = mmNIC2_QM0_BASE;
7204 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7205 		break;
7206 	case GAUDI_EVENT_NIC2_QM1:
7207 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7208 		qman_base = mmNIC2_QM1_BASE;
7209 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7210 		break;
7211 	case GAUDI_EVENT_NIC3_QM0:
7212 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7213 		qman_base = mmNIC3_QM0_BASE;
7214 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7215 		break;
7216 	case GAUDI_EVENT_NIC3_QM1:
7217 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7218 		qman_base = mmNIC3_QM1_BASE;
7219 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7220 		break;
7221 	case GAUDI_EVENT_NIC4_QM0:
7222 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7223 		qman_base = mmNIC4_QM0_BASE;
7224 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7225 		break;
7226 	case GAUDI_EVENT_NIC4_QM1:
7227 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7228 		qman_base = mmNIC4_QM1_BASE;
7229 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7230 		break;
7231 	default:
7232 		return;
7233 	}
7234 
7235 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7236 }
7237 
7238 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7239 					bool check_razwi, u64 *event_mask)
7240 {
7241 	bool is_read = false, is_write = false;
7242 	u16 engine_id[2], num_of_razwi_eng = 0;
7243 	char desc[64] = "";
7244 	u64 razwi_addr = 0;
7245 	u8 razwi_flags = 0;
7246 
7247 	/*
7248 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7249 	 * engine id it will get valid value.
7250 	 */
7251 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7252 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7253 
7254 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7255 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7256 		event_type, desc);
7257 
7258 	if (check_razwi) {
7259 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7260 						&is_write);
7261 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7262 
7263 		if (is_read)
7264 			razwi_flags |= HL_RAZWI_READ;
7265 		if (is_write)
7266 			razwi_flags |= HL_RAZWI_WRITE;
7267 
7268 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7269 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7270 				num_of_razwi_eng = 2;
7271 			else
7272 				num_of_razwi_eng = 1;
7273 		}
7274 
7275 		if (razwi_flags)
7276 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7277 					razwi_flags, event_mask);
7278 	}
7279 }
7280 
7281 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7282 					struct cpucp_pkt_sync_err *sync_err)
7283 {
7284 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7285 
7286 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7287 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7288 }
7289 
7290 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7291 					struct hl_eq_fw_alive *fw_alive)
7292 {
7293 	dev_err(hdev->dev,
7294 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7295 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7296 		le32_to_cpu(fw_alive->process_id),
7297 		le32_to_cpu(fw_alive->thread_id),
7298 		le64_to_cpu(fw_alive->uptime_seconds));
7299 }
7300 
7301 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7302 						void *data)
7303 {
7304 	char desc[64] = "", *type;
7305 	struct eq_nic_sei_event *eq_nic_sei = data;
7306 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7307 
7308 	switch (eq_nic_sei->axi_error_cause) {
7309 	case RXB:
7310 		type = "RXB";
7311 		break;
7312 	case RXE:
7313 		type = "RXE";
7314 		break;
7315 	case TXS:
7316 		type = "TXS";
7317 		break;
7318 	case TXE:
7319 		type = "TXE";
7320 		break;
7321 	case QPC_RESP:
7322 		type = "QPC_RESP";
7323 		break;
7324 	case NON_AXI_ERR:
7325 		type = "NON_AXI_ERR";
7326 		break;
7327 	case TMR:
7328 		type = "TMR";
7329 		break;
7330 	default:
7331 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7332 			eq_nic_sei->axi_error_cause);
7333 		type = "N/A";
7334 		break;
7335 	}
7336 
7337 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7338 			eq_nic_sei->id);
7339 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7340 		event_type, desc);
7341 }
7342 
7343 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7344 {
7345 	/* GAUDI doesn't support any reset except hard-reset */
7346 	return -EPERM;
7347 }
7348 
7349 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7350 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7351 {
7352 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7353 	int rc = 0;
7354 
7355 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7356 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7357 		if (!hbm_ecc_data) {
7358 			dev_err(hdev->dev, "No FW ECC data");
7359 			return 0;
7360 		}
7361 
7362 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7363 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7364 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7365 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7366 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7367 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7368 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7369 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7370 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7371 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7372 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7373 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7374 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7375 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7376 
7377 		dev_err(hdev->dev,
7378 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7379 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7380 		dev_err(hdev->dev,
7381 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7382 			device, ch, hbm_ecc_data->first_addr, type,
7383 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7384 			hbm_ecc_data->dec_cnt);
7385 		return 0;
7386 	}
7387 
7388 	if (hdev->asic_prop.fw_security_enabled) {
7389 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7390 		return 0;
7391 	}
7392 
7393 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7394 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7395 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7396 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7397 		if (val) {
7398 			rc = -EIO;
7399 			dev_err(hdev->dev,
7400 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7401 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7402 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7403 				(val >> 4) & 0x1);
7404 
7405 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7406 			dev_err(hdev->dev,
7407 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7408 				device, ch * 2,
7409 				RREG32(base + ch * 0x1000 + 0x064),
7410 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7411 				(val2 & 0xFF0000) >> 16,
7412 				(val2 & 0xFF000000) >> 24);
7413 		}
7414 
7415 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7416 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7417 		if (val) {
7418 			rc = -EIO;
7419 			dev_err(hdev->dev,
7420 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7421 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7422 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7423 				(val >> 4) & 0x1);
7424 
7425 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7426 			dev_err(hdev->dev,
7427 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7428 				device, ch * 2 + 1,
7429 				RREG32(base + ch * 0x1000 + 0x074),
7430 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7431 				(val2 & 0xFF0000) >> 16,
7432 				(val2 & 0xFF000000) >> 24);
7433 		}
7434 
7435 		/* Clear interrupts */
7436 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7437 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7438 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7439 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7440 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7441 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7442 	}
7443 
7444 	val  = RREG32(base + 0x8F30);
7445 	val2 = RREG32(base + 0x8F34);
7446 	if (val | val2) {
7447 		rc = -EIO;
7448 		dev_err(hdev->dev,
7449 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7450 			device, val, val2);
7451 	}
7452 	val  = RREG32(base + 0x8F40);
7453 	val2 = RREG32(base + 0x8F44);
7454 	if (val | val2) {
7455 		rc = -EIO;
7456 		dev_err(hdev->dev,
7457 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7458 			device, val, val2);
7459 	}
7460 
7461 	return rc;
7462 }
7463 
7464 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7465 {
7466 	switch (hbm_event_type) {
7467 	case GAUDI_EVENT_HBM0_SPI_0:
7468 	case GAUDI_EVENT_HBM0_SPI_1:
7469 		return 0;
7470 	case GAUDI_EVENT_HBM1_SPI_0:
7471 	case GAUDI_EVENT_HBM1_SPI_1:
7472 		return 1;
7473 	case GAUDI_EVENT_HBM2_SPI_0:
7474 	case GAUDI_EVENT_HBM2_SPI_1:
7475 		return 2;
7476 	case GAUDI_EVENT_HBM3_SPI_0:
7477 	case GAUDI_EVENT_HBM3_SPI_1:
7478 		return 3;
7479 	default:
7480 		break;
7481 	}
7482 
7483 	/* Should never happen */
7484 	return 0;
7485 }
7486 
7487 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7488 					char *interrupt_name)
7489 {
7490 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7491 	bool soft_reset_required = false;
7492 
7493 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7494 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7495 
7496 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7497 		if (tpc_interrupts_cause & BIT(i)) {
7498 			dev_err_ratelimited(hdev->dev,
7499 					"TPC%d_%s interrupt cause: %s\n",
7500 					tpc_id, interrupt_name,
7501 					gaudi_tpc_interrupts_cause[i]);
7502 			/* If this is QM error, we need to soft-reset */
7503 			if (i == 15)
7504 				soft_reset_required = true;
7505 		}
7506 
7507 	/* Clear interrupts */
7508 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7509 
7510 	return soft_reset_required;
7511 }
7512 
7513 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7514 {
7515 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7516 }
7517 
7518 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7519 {
7520 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7521 }
7522 
7523 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7524 {
7525 	ktime_t zero_time = ktime_set(0, 0);
7526 
7527 	mutex_lock(&hdev->clk_throttling.lock);
7528 
7529 	switch (event_type) {
7530 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7531 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7532 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7533 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7534 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7535 		dev_info_ratelimited(hdev->dev,
7536 			"Clock throttling due to power consumption\n");
7537 		break;
7538 
7539 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7540 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7541 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7542 		dev_info_ratelimited(hdev->dev,
7543 			"Power envelop is safe, back to optimal clock\n");
7544 		break;
7545 
7546 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7547 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7548 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7549 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7550 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7551 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7552 		dev_info_ratelimited(hdev->dev,
7553 			"Clock throttling due to overheating\n");
7554 		break;
7555 
7556 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7557 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7558 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7559 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7560 		dev_info_ratelimited(hdev->dev,
7561 			"Thermal envelop is safe, back to optimal clock\n");
7562 		break;
7563 
7564 	default:
7565 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7566 			event_type);
7567 		break;
7568 	}
7569 
7570 	mutex_unlock(&hdev->clk_throttling.lock);
7571 }
7572 
7573 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7574 {
7575 	struct gaudi_device *gaudi = hdev->asic_specific;
7576 	struct hl_info_fw_err_info fw_err_info;
7577 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7578 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7579 	u32 fw_fatal_err_flag = 0, flags = 0;
7580 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7581 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7582 	bool reset_required, reset_direct = false;
7583 	u8 cause;
7584 	int rc;
7585 
7586 	if (event_type >= GAUDI_EVENT_SIZE) {
7587 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7588 				event_type, GAUDI_EVENT_SIZE - 1);
7589 		return;
7590 	}
7591 
7592 	gaudi->events_stat[event_type]++;
7593 	gaudi->events_stat_aggregate[event_type]++;
7594 
7595 	switch (event_type) {
7596 	case GAUDI_EVENT_PCIE_CORE_DERR:
7597 	case GAUDI_EVENT_PCIE_IF_DERR:
7598 	case GAUDI_EVENT_PCIE_PHY_DERR:
7599 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7600 	case GAUDI_EVENT_MME0_ACC_DERR:
7601 	case GAUDI_EVENT_MME0_SBAB_DERR:
7602 	case GAUDI_EVENT_MME1_ACC_DERR:
7603 	case GAUDI_EVENT_MME1_SBAB_DERR:
7604 	case GAUDI_EVENT_MME2_ACC_DERR:
7605 	case GAUDI_EVENT_MME2_SBAB_DERR:
7606 	case GAUDI_EVENT_MME3_ACC_DERR:
7607 	case GAUDI_EVENT_MME3_SBAB_DERR:
7608 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7609 		fallthrough;
7610 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7611 	case GAUDI_EVENT_PSOC_MEM_DERR:
7612 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7613 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7614 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7615 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7616 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7617 	case GAUDI_EVENT_MMU_DERR:
7618 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7619 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7620 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7621 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7622 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7623 		goto reset_device;
7624 
7625 	case GAUDI_EVENT_GIC500:
7626 	case GAUDI_EVENT_AXI_ECC:
7627 	case GAUDI_EVENT_L2_RAM_ECC:
7628 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7629 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7630 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7631 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7632 		goto reset_device;
7633 
7634 	case GAUDI_EVENT_HBM0_SPI_0:
7635 	case GAUDI_EVENT_HBM1_SPI_0:
7636 	case GAUDI_EVENT_HBM2_SPI_0:
7637 	case GAUDI_EVENT_HBM3_SPI_0:
7638 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7639 		gaudi_hbm_read_interrupts(hdev,
7640 				gaudi_hbm_event_to_dev(event_type),
7641 				&eq_entry->hbm_ecc_data);
7642 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7643 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7644 		goto reset_device;
7645 
7646 	case GAUDI_EVENT_HBM0_SPI_1:
7647 	case GAUDI_EVENT_HBM1_SPI_1:
7648 	case GAUDI_EVENT_HBM2_SPI_1:
7649 	case GAUDI_EVENT_HBM3_SPI_1:
7650 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7651 		gaudi_hbm_read_interrupts(hdev,
7652 				gaudi_hbm_event_to_dev(event_type),
7653 				&eq_entry->hbm_ecc_data);
7654 		hl_fw_unmask_irq(hdev, event_type);
7655 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7656 		break;
7657 
7658 	case GAUDI_EVENT_TPC0_DEC:
7659 	case GAUDI_EVENT_TPC1_DEC:
7660 	case GAUDI_EVENT_TPC2_DEC:
7661 	case GAUDI_EVENT_TPC3_DEC:
7662 	case GAUDI_EVENT_TPC4_DEC:
7663 	case GAUDI_EVENT_TPC5_DEC:
7664 	case GAUDI_EVENT_TPC6_DEC:
7665 	case GAUDI_EVENT_TPC7_DEC:
7666 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7667 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7668 		 * The SW upper layer will inspect an internal mapped area to indicate
7669 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7670 		 */
7671 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7672 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7673 		reset_required = gaudi_tpc_read_interrupts(hdev,
7674 					tpc_dec_event_to_tpc_id(event_type),
7675 					"AXI_SLV_DEC_Error");
7676 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7677 		if (reset_required) {
7678 			dev_err(hdev->dev, "reset required due to %s\n",
7679 				gaudi_irq_map_table[event_type].name);
7680 
7681 			reset_direct = true;
7682 			goto reset_device;
7683 		} else {
7684 			hl_fw_unmask_irq(hdev, event_type);
7685 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7686 		}
7687 		break;
7688 
7689 	case GAUDI_EVENT_TPC0_KRN_ERR:
7690 	case GAUDI_EVENT_TPC1_KRN_ERR:
7691 	case GAUDI_EVENT_TPC2_KRN_ERR:
7692 	case GAUDI_EVENT_TPC3_KRN_ERR:
7693 	case GAUDI_EVENT_TPC4_KRN_ERR:
7694 	case GAUDI_EVENT_TPC5_KRN_ERR:
7695 	case GAUDI_EVENT_TPC6_KRN_ERR:
7696 	case GAUDI_EVENT_TPC7_KRN_ERR:
7697 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7698 		reset_required = gaudi_tpc_read_interrupts(hdev,
7699 					tpc_krn_event_to_tpc_id(event_type),
7700 					"KRN_ERR");
7701 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7702 		if (reset_required) {
7703 			dev_err(hdev->dev, "reset required due to %s\n",
7704 				gaudi_irq_map_table[event_type].name);
7705 
7706 			reset_direct = true;
7707 			goto reset_device;
7708 		} else {
7709 			hl_fw_unmask_irq(hdev, event_type);
7710 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7711 		}
7712 		break;
7713 
7714 	case GAUDI_EVENT_PCIE_CORE_SERR:
7715 	case GAUDI_EVENT_PCIE_IF_SERR:
7716 	case GAUDI_EVENT_PCIE_PHY_SERR:
7717 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7718 	case GAUDI_EVENT_MME0_ACC_SERR:
7719 	case GAUDI_EVENT_MME0_SBAB_SERR:
7720 	case GAUDI_EVENT_MME1_ACC_SERR:
7721 	case GAUDI_EVENT_MME1_SBAB_SERR:
7722 	case GAUDI_EVENT_MME2_ACC_SERR:
7723 	case GAUDI_EVENT_MME2_SBAB_SERR:
7724 	case GAUDI_EVENT_MME3_ACC_SERR:
7725 	case GAUDI_EVENT_MME3_SBAB_SERR:
7726 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7727 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7728 	case GAUDI_EVENT_PSOC_MEM_SERR:
7729 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7730 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7731 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7732 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7733 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7734 		fallthrough;
7735 	case GAUDI_EVENT_MMU_SERR:
7736 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7737 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7738 		hl_fw_unmask_irq(hdev, event_type);
7739 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7740 		break;
7741 
7742 	case GAUDI_EVENT_PCIE_DEC:
7743 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7744 	case GAUDI_EVENT_PSOC_AXI_DEC:
7745 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7746 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7747 		hl_fw_unmask_irq(hdev, event_type);
7748 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7749 		break;
7750 
7751 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7752 	case GAUDI_EVENT_MMU_WR_PERM:
7753 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7754 		hl_fw_unmask_irq(hdev, event_type);
7755 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7756 		break;
7757 
7758 	case GAUDI_EVENT_MME0_WBC_RSP:
7759 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7760 	case GAUDI_EVENT_MME1_WBC_RSP:
7761 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7762 	case GAUDI_EVENT_MME2_WBC_RSP:
7763 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7764 	case GAUDI_EVENT_MME3_WBC_RSP:
7765 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7766 	case GAUDI_EVENT_RAZWI_OR_ADC:
7767 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7768 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7769 		fallthrough;
7770 	case GAUDI_EVENT_NIC0_QM0:
7771 	case GAUDI_EVENT_NIC0_QM1:
7772 	case GAUDI_EVENT_NIC1_QM0:
7773 	case GAUDI_EVENT_NIC1_QM1:
7774 	case GAUDI_EVENT_NIC2_QM0:
7775 	case GAUDI_EVENT_NIC2_QM1:
7776 	case GAUDI_EVENT_NIC3_QM0:
7777 	case GAUDI_EVENT_NIC3_QM1:
7778 	case GAUDI_EVENT_NIC4_QM0:
7779 	case GAUDI_EVENT_NIC4_QM1:
7780 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7781 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7782 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7783 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7784 		hl_fw_unmask_irq(hdev, event_type);
7785 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7786 		break;
7787 
7788 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7789 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7790 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7791 		goto reset_device;
7792 
7793 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7794 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7795 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7796 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7797 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7798 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7799 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7800 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7801 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7802 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7803 		hl_fw_unmask_irq(hdev, event_type);
7804 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7805 		break;
7806 
7807 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7808 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7809 		hl_fw_unmask_irq(hdev, event_type);
7810 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7811 		break;
7812 
7813 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7814 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7815 		gaudi_print_sm_sei_info(hdev, event_type,
7816 					&eq_entry->sm_sei_data);
7817 		rc = hl_state_dump(hdev);
7818 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7819 		if (rc)
7820 			dev_err(hdev->dev,
7821 				"Error during system state dump %d\n", rc);
7822 		hl_fw_unmask_irq(hdev, event_type);
7823 		break;
7824 
7825 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7826 		break;
7827 
7828 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7829 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7830 		hl_fw_unmask_irq(hdev, event_type);
7831 		break;
7832 
7833 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7834 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7835 		dev_err(hdev->dev,
7836 			"Received high temp H/W interrupt %d (cause %d)\n",
7837 			event_type, cause);
7838 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7839 		break;
7840 
7841 	case GAUDI_EVENT_DEV_RESET_REQ:
7842 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7843 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7844 		goto reset_device;
7845 
7846 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7847 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7848 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7849 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7850 		goto reset_device;
7851 
7852 	case GAUDI_EVENT_FW_ALIVE_S:
7853 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7854 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7855 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7856 		fw_err_info.event_id = event_type;
7857 		fw_err_info.event_mask = &event_mask;
7858 		hl_handle_fw_err(hdev, &fw_err_info);
7859 		goto reset_device;
7860 
7861 	default:
7862 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7863 				event_type);
7864 		break;
7865 	}
7866 
7867 	if (event_mask)
7868 		hl_notifier_event_send_all(hdev, event_mask);
7869 
7870 	return;
7871 
7872 reset_device:
7873 	reset_required = true;
7874 
7875 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7876 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7877 
7878 		/* notify on device unavailable while the reset triggered by fw */
7879 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7880 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7881 	} else if (hdev->hard_reset_on_fw_events) {
7882 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7883 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7884 	} else {
7885 		reset_required = false;
7886 	}
7887 
7888 	if (reset_required) {
7889 		/* escalate general hw errors to critical/fatal error */
7890 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7891 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7892 
7893 		hl_device_cond_reset(hdev, flags, event_mask);
7894 	} else {
7895 		hl_fw_unmask_irq(hdev, event_type);
7896 		/* Notification on occurred event needs to be sent although reset is not executed */
7897 		if (event_mask)
7898 			hl_notifier_event_send_all(hdev, event_mask);
7899 	}
7900 }
7901 
7902 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7903 {
7904 	struct gaudi_device *gaudi = hdev->asic_specific;
7905 
7906 	if (aggregate) {
7907 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7908 		return gaudi->events_stat_aggregate;
7909 	}
7910 
7911 	*size = (u32) sizeof(gaudi->events_stat);
7912 	return gaudi->events_stat;
7913 }
7914 
7915 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7916 {
7917 	struct gaudi_device *gaudi = hdev->asic_specific;
7918 	u32 status, timeout_usec;
7919 	int rc;
7920 
7921 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7922 		hdev->reset_info.hard_reset_pending)
7923 		return 0;
7924 
7925 	if (hdev->pldm)
7926 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7927 	else
7928 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7929 
7930 	/* L0 & L1 invalidation */
7931 	WREG32(mmSTLB_INV_PS, 3);
7932 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7933 	WREG32(mmSTLB_INV_PS, 2);
7934 
7935 	rc = hl_poll_timeout(
7936 		hdev,
7937 		mmSTLB_INV_PS,
7938 		status,
7939 		!status,
7940 		1000,
7941 		timeout_usec);
7942 
7943 	WREG32(mmSTLB_INV_SET, 0);
7944 
7945 	return rc;
7946 }
7947 
7948 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7949 						bool is_hard, u32 flags,
7950 						u32 asid, u64 va, u64 size)
7951 {
7952 	/* Treat as invalidate all because there is no range invalidation
7953 	 * in Gaudi
7954 	 */
7955 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7956 }
7957 
7958 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7959 {
7960 	u32 status, timeout_usec;
7961 	int rc;
7962 
7963 	if (hdev->pldm)
7964 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7965 	else
7966 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7967 
7968 	WREG32(MMU_ASID, asid);
7969 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7970 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7971 	WREG32(MMU_BUSY, 0x80000000);
7972 
7973 	rc = hl_poll_timeout(
7974 		hdev,
7975 		MMU_BUSY,
7976 		status,
7977 		!(status & 0x80000000),
7978 		1000,
7979 		timeout_usec);
7980 
7981 	if (rc) {
7982 		dev_err(hdev->dev,
7983 			"Timeout during MMU hop0 config of asid %d\n", asid);
7984 		return rc;
7985 	}
7986 
7987 	return 0;
7988 }
7989 
7990 static int gaudi_send_heartbeat(struct hl_device *hdev)
7991 {
7992 	struct gaudi_device *gaudi = hdev->asic_specific;
7993 
7994 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7995 		return 0;
7996 
7997 	return hl_fw_send_heartbeat(hdev);
7998 }
7999 
8000 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8001 {
8002 	struct gaudi_device *gaudi = hdev->asic_specific;
8003 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8004 	int rc;
8005 
8006 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8007 		return 0;
8008 
8009 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8010 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8011 					mmCPU_BOOT_ERR1);
8012 	if (rc)
8013 		return rc;
8014 
8015 	if (!strlen(prop->cpucp_info.card_name))
8016 		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8017 				CARD_NAME_MAX_LEN);
8018 
8019 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8020 
8021 	set_default_power_values(hdev);
8022 
8023 	return 0;
8024 }
8025 
8026 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8027 		struct engines_data *e)
8028 {
8029 	struct gaudi_device *gaudi = hdev->asic_specific;
8030 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8031 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8032 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8033 	unsigned long *mask = (unsigned long *)mask_arr;
8034 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8035 	bool is_idle = true, is_eng_idle, is_slave;
8036 	u64 offset;
8037 	int i, dma_id, port;
8038 
8039 	if (e)
8040 		hl_engine_data_sprintf(e,
8041 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8042 			"---  -------  ------------  ----------  -------------\n");
8043 
8044 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8045 		dma_id = gaudi_dma_assignment[i];
8046 		offset = dma_id * DMA_QMAN_OFFSET;
8047 
8048 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8049 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8050 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8051 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8052 				IS_DMA_IDLE(dma_core_sts0);
8053 		is_idle &= is_eng_idle;
8054 
8055 		if (mask && !is_eng_idle)
8056 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8057 		if (e)
8058 			hl_engine_data_sprintf(e, fmt, dma_id,
8059 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8060 				qm_cgm_sts, dma_core_sts0);
8061 	}
8062 
8063 	if (e)
8064 		hl_engine_data_sprintf(e,
8065 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8066 			"---  -------  ------------  ----------  ----------\n");
8067 
8068 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8069 		offset = i * TPC_QMAN_OFFSET;
8070 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8071 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8072 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8073 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8074 				IS_TPC_IDLE(tpc_cfg_sts);
8075 		is_idle &= is_eng_idle;
8076 
8077 		if (mask && !is_eng_idle)
8078 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8079 		if (e)
8080 			hl_engine_data_sprintf(e, fmt, i,
8081 				is_eng_idle ? "Y" : "N",
8082 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8083 	}
8084 
8085 	if (e)
8086 		hl_engine_data_sprintf(e,
8087 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8088 			"---  -------  ------------  ----------  -----------\n");
8089 
8090 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8091 		offset = i * MME_QMAN_OFFSET;
8092 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8093 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8094 
8095 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8096 		is_slave = i % 2;
8097 		if (!is_slave) {
8098 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8099 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8100 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8101 		}
8102 
8103 		is_idle &= is_eng_idle;
8104 
8105 		if (mask && !is_eng_idle)
8106 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8107 		if (e) {
8108 			if (!is_slave)
8109 				hl_engine_data_sprintf(e, fmt, i,
8110 					is_eng_idle ? "Y" : "N",
8111 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8112 			else
8113 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8114 					is_eng_idle ? "Y" : "N", "-",
8115 					"-", mme_arch_sts);
8116 		}
8117 	}
8118 
8119 	if (e)
8120 		hl_engine_data_sprintf(e,
8121 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8122 				"---  -------  ------------  ----------\n");
8123 
8124 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8125 		offset = i * NIC_MACRO_QMAN_OFFSET;
8126 		port = 2 * i;
8127 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8128 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8129 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8130 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8131 			is_idle &= is_eng_idle;
8132 
8133 			if (mask && !is_eng_idle)
8134 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8135 			if (e)
8136 				hl_engine_data_sprintf(e, nic_fmt, port,
8137 						is_eng_idle ? "Y" : "N",
8138 						qm_glbl_sts0, qm_cgm_sts);
8139 		}
8140 
8141 		port = 2 * i + 1;
8142 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8143 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8144 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8145 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8146 			is_idle &= is_eng_idle;
8147 
8148 			if (mask && !is_eng_idle)
8149 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8150 			if (e)
8151 				hl_engine_data_sprintf(e, nic_fmt, port,
8152 						is_eng_idle ? "Y" : "N",
8153 						qm_glbl_sts0, qm_cgm_sts);
8154 		}
8155 	}
8156 
8157 	if (e)
8158 		hl_engine_data_sprintf(e, "\n");
8159 
8160 	return is_idle;
8161 }
8162 
8163 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8164 	__acquires(&gaudi->hw_queues_lock)
8165 {
8166 	struct gaudi_device *gaudi = hdev->asic_specific;
8167 
8168 	spin_lock(&gaudi->hw_queues_lock);
8169 }
8170 
8171 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8172 	__releases(&gaudi->hw_queues_lock)
8173 {
8174 	struct gaudi_device *gaudi = hdev->asic_specific;
8175 
8176 	spin_unlock(&gaudi->hw_queues_lock);
8177 }
8178 
8179 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8180 {
8181 	return hdev->pdev->device;
8182 }
8183 
8184 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8185 				size_t max_size)
8186 {
8187 	struct gaudi_device *gaudi = hdev->asic_specific;
8188 
8189 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8190 		return 0;
8191 
8192 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8193 }
8194 
8195 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8196 {
8197 	struct gaudi_device *gaudi = hdev->asic_specific;
8198 
8199 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8200 		return 0;
8201 
8202 	return hl_fw_get_monitor_dump(hdev, data);
8203 }
8204 
8205 /*
8206  * this function should be used only during initialization and/or after reset,
8207  * when there are no active users.
8208  */
8209 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8210 {
8211 	u64 kernel_timeout;
8212 	u32 status, offset;
8213 	int rc;
8214 
8215 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8216 
8217 	if (hdev->pldm)
8218 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8219 	else
8220 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8221 
8222 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8223 			lower_32_bits(tpc_kernel));
8224 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8225 			upper_32_bits(tpc_kernel));
8226 
8227 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8228 			lower_32_bits(tpc_kernel));
8229 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8230 			upper_32_bits(tpc_kernel));
8231 	/* set a valid LUT pointer, content is of no significance */
8232 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8233 			lower_32_bits(tpc_kernel));
8234 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8235 			upper_32_bits(tpc_kernel));
8236 
8237 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8238 			lower_32_bits(CFG_BASE +
8239 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8240 
8241 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8242 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8243 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8244 	/* wait a bit for the engine to start executing */
8245 	usleep_range(1000, 1500);
8246 
8247 	/* wait until engine has finished executing */
8248 	rc = hl_poll_timeout(
8249 		hdev,
8250 		mmTPC0_CFG_STATUS + offset,
8251 		status,
8252 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8253 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8254 		1000,
8255 		kernel_timeout);
8256 
8257 	if (rc) {
8258 		dev_err(hdev->dev,
8259 			"Timeout while waiting for TPC%d icache prefetch\n",
8260 			tpc_id);
8261 		return -EIO;
8262 	}
8263 
8264 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8265 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8266 
8267 	/* wait a bit for the engine to start executing */
8268 	usleep_range(1000, 1500);
8269 
8270 	/* wait until engine has finished executing */
8271 	rc = hl_poll_timeout(
8272 		hdev,
8273 		mmTPC0_CFG_STATUS + offset,
8274 		status,
8275 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8276 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8277 		1000,
8278 		kernel_timeout);
8279 
8280 	if (rc) {
8281 		dev_err(hdev->dev,
8282 			"Timeout while waiting for TPC%d vector pipe\n",
8283 			tpc_id);
8284 		return -EIO;
8285 	}
8286 
8287 	rc = hl_poll_timeout(
8288 		hdev,
8289 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8290 		status,
8291 		(status == 0),
8292 		1000,
8293 		kernel_timeout);
8294 
8295 	if (rc) {
8296 		dev_err(hdev->dev,
8297 			"Timeout while waiting for TPC%d kernel to execute\n",
8298 			tpc_id);
8299 		return -EIO;
8300 	}
8301 
8302 	return 0;
8303 }
8304 
8305 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8306 		struct hl_ctx *ctx)
8307 {
8308 	struct gaudi_device *gaudi = hdev->asic_specific;
8309 	int min_alloc_order, rc, collective_cb_size;
8310 
8311 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8312 		return 0;
8313 
8314 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8315 							HOST_SPACE_INTERNAL_CB_SZ,
8316 							&hdev->internal_cb_pool_dma_addr,
8317 							GFP_KERNEL | __GFP_ZERO);
8318 
8319 	if (!hdev->internal_cb_pool_virt_addr)
8320 		return -ENOMEM;
8321 
8322 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8323 			sizeof(struct packet_fence);
8324 	min_alloc_order = ilog2(collective_cb_size);
8325 
8326 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8327 	if (!hdev->internal_cb_pool) {
8328 		dev_err(hdev->dev,
8329 			"Failed to create internal CB pool\n");
8330 		rc = -ENOMEM;
8331 		goto free_internal_cb_pool;
8332 	}
8333 
8334 	rc = gen_pool_add(hdev->internal_cb_pool,
8335 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8336 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8337 	if (rc) {
8338 		dev_err(hdev->dev,
8339 			"Failed to add memory to internal CB pool\n");
8340 		rc = -EFAULT;
8341 		goto destroy_internal_cb_pool;
8342 	}
8343 
8344 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8345 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8346 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8347 
8348 	if (!hdev->internal_cb_va_base) {
8349 		rc = -ENOMEM;
8350 		goto destroy_internal_cb_pool;
8351 	}
8352 
8353 	mutex_lock(&hdev->mmu_lock);
8354 
8355 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8356 			hdev->internal_cb_pool_dma_addr,
8357 			HOST_SPACE_INTERNAL_CB_SZ);
8358 	if (rc)
8359 		goto unreserve_internal_cb_pool;
8360 
8361 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8362 	if (rc)
8363 		goto unmap_internal_cb_pool;
8364 
8365 	mutex_unlock(&hdev->mmu_lock);
8366 
8367 	return 0;
8368 
8369 unmap_internal_cb_pool:
8370 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8371 			HOST_SPACE_INTERNAL_CB_SZ);
8372 unreserve_internal_cb_pool:
8373 	mutex_unlock(&hdev->mmu_lock);
8374 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8375 			HOST_SPACE_INTERNAL_CB_SZ);
8376 destroy_internal_cb_pool:
8377 	gen_pool_destroy(hdev->internal_cb_pool);
8378 free_internal_cb_pool:
8379 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8380 					hdev->internal_cb_pool_dma_addr);
8381 
8382 	return rc;
8383 }
8384 
8385 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8386 		struct hl_ctx *ctx)
8387 {
8388 	struct gaudi_device *gaudi = hdev->asic_specific;
8389 
8390 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8391 		return;
8392 
8393 	mutex_lock(&hdev->mmu_lock);
8394 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8395 			HOST_SPACE_INTERNAL_CB_SZ);
8396 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8397 			HOST_SPACE_INTERNAL_CB_SZ);
8398 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8399 	mutex_unlock(&hdev->mmu_lock);
8400 
8401 	gen_pool_destroy(hdev->internal_cb_pool);
8402 
8403 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8404 					hdev->internal_cb_pool_dma_addr);
8405 }
8406 
8407 static int gaudi_ctx_init(struct hl_ctx *ctx)
8408 {
8409 	int rc;
8410 
8411 	if (ctx->asid == HL_KERNEL_ASID_ID)
8412 		return 0;
8413 
8414 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8415 	if (rc)
8416 		return rc;
8417 
8418 	rc = gaudi_restore_user_registers(ctx->hdev);
8419 	if (rc)
8420 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8421 
8422 	return rc;
8423 }
8424 
8425 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8426 {
8427 	if (ctx->asid == HL_KERNEL_ASID_ID)
8428 		return;
8429 
8430 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8431 }
8432 
8433 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8434 {
8435 	return 0;
8436 }
8437 
8438 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8439 {
8440 	return gaudi_cq_assignment[cq_idx];
8441 }
8442 
8443 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8444 {
8445 	return sizeof(struct packet_msg_short) +
8446 			sizeof(struct packet_msg_prot) * 2;
8447 }
8448 
8449 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8450 {
8451 	return sizeof(struct packet_msg_short) * 4 +
8452 			sizeof(struct packet_fence) +
8453 			sizeof(struct packet_msg_prot) * 2;
8454 }
8455 
8456 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8457 {
8458 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8459 }
8460 
8461 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8462 				u32 size, bool eb)
8463 {
8464 	struct hl_cb *cb = (struct hl_cb *) data;
8465 	struct packet_msg_short *pkt;
8466 	u32 value, ctl, pkt_size = sizeof(*pkt);
8467 
8468 	pkt = cb->kernel_address + size;
8469 	memset(pkt, 0, pkt_size);
8470 
8471 	/* Inc by 1, Mode ADD */
8472 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8473 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8474 
8475 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8476 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8477 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8478 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8479 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8480 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8481 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8482 
8483 	pkt->value = cpu_to_le32(value);
8484 	pkt->ctl = cpu_to_le32(ctl);
8485 
8486 	return size + pkt_size;
8487 }
8488 
8489 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8490 					u16 addr)
8491 {
8492 	u32 ctl, pkt_size = sizeof(*pkt);
8493 
8494 	memset(pkt, 0, pkt_size);
8495 
8496 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8497 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8498 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8499 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8500 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8501 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8502 
8503 	pkt->value = cpu_to_le32(value);
8504 	pkt->ctl = cpu_to_le32(ctl);
8505 
8506 	return pkt_size;
8507 }
8508 
8509 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8510 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8511 		u16 sob_val, u16 mon_id)
8512 {
8513 	u64 monitor_base;
8514 	u32 ctl, value, pkt_size = sizeof(*pkt);
8515 	u16 msg_addr_offset;
8516 	u8 mask;
8517 
8518 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8519 		dev_err(hdev->dev,
8520 			"sob_base %u (mask %#x) is not valid\n",
8521 			sob_base, sob_mask);
8522 		return 0;
8523 	}
8524 
8525 	/*
8526 	 * monitor_base should be the content of the base0 address registers,
8527 	 * so it will be added to the msg short offsets
8528 	 */
8529 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8530 
8531 	msg_addr_offset =
8532 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8533 				monitor_base;
8534 
8535 	memset(pkt, 0, pkt_size);
8536 
8537 	/* Monitor config packet: bind the monitor to a sync object */
8538 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8539 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8540 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8541 			0); /* GREATER OR EQUAL*/
8542 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8543 
8544 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8545 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8546 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8547 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8548 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8549 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8550 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8551 
8552 	pkt->value = cpu_to_le32(value);
8553 	pkt->ctl = cpu_to_le32(ctl);
8554 
8555 	return pkt_size;
8556 }
8557 
8558 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8559 {
8560 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8561 
8562 	memset(pkt, 0, pkt_size);
8563 
8564 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8565 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8566 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8567 
8568 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8569 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8570 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8571 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8572 
8573 	pkt->cfg = cpu_to_le32(cfg);
8574 	pkt->ctl = cpu_to_le32(ctl);
8575 
8576 	return pkt_size;
8577 }
8578 
8579 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8580 {
8581 	u32 offset, nic_index;
8582 
8583 	switch (queue_id) {
8584 	case GAUDI_QUEUE_ID_DMA_0_0:
8585 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8586 		break;
8587 	case GAUDI_QUEUE_ID_DMA_0_1:
8588 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8589 		break;
8590 	case GAUDI_QUEUE_ID_DMA_0_2:
8591 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8592 		break;
8593 	case GAUDI_QUEUE_ID_DMA_0_3:
8594 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8595 		break;
8596 	case GAUDI_QUEUE_ID_DMA_1_0:
8597 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8598 		break;
8599 	case GAUDI_QUEUE_ID_DMA_1_1:
8600 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8601 		break;
8602 	case GAUDI_QUEUE_ID_DMA_1_2:
8603 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8604 		break;
8605 	case GAUDI_QUEUE_ID_DMA_1_3:
8606 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8607 		break;
8608 	case GAUDI_QUEUE_ID_DMA_5_0:
8609 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8610 		break;
8611 	case GAUDI_QUEUE_ID_DMA_5_1:
8612 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8613 		break;
8614 	case GAUDI_QUEUE_ID_DMA_5_2:
8615 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8616 		break;
8617 	case GAUDI_QUEUE_ID_DMA_5_3:
8618 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8619 		break;
8620 	case GAUDI_QUEUE_ID_TPC_7_0:
8621 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8622 		break;
8623 	case GAUDI_QUEUE_ID_TPC_7_1:
8624 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8625 		break;
8626 	case GAUDI_QUEUE_ID_TPC_7_2:
8627 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8628 		break;
8629 	case GAUDI_QUEUE_ID_TPC_7_3:
8630 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8631 		break;
8632 	case GAUDI_QUEUE_ID_NIC_0_0:
8633 	case GAUDI_QUEUE_ID_NIC_1_0:
8634 	case GAUDI_QUEUE_ID_NIC_2_0:
8635 	case GAUDI_QUEUE_ID_NIC_3_0:
8636 	case GAUDI_QUEUE_ID_NIC_4_0:
8637 	case GAUDI_QUEUE_ID_NIC_5_0:
8638 	case GAUDI_QUEUE_ID_NIC_6_0:
8639 	case GAUDI_QUEUE_ID_NIC_7_0:
8640 	case GAUDI_QUEUE_ID_NIC_8_0:
8641 	case GAUDI_QUEUE_ID_NIC_9_0:
8642 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8643 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8644 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8645 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8646 		break;
8647 	case GAUDI_QUEUE_ID_NIC_0_1:
8648 	case GAUDI_QUEUE_ID_NIC_1_1:
8649 	case GAUDI_QUEUE_ID_NIC_2_1:
8650 	case GAUDI_QUEUE_ID_NIC_3_1:
8651 	case GAUDI_QUEUE_ID_NIC_4_1:
8652 	case GAUDI_QUEUE_ID_NIC_5_1:
8653 	case GAUDI_QUEUE_ID_NIC_6_1:
8654 	case GAUDI_QUEUE_ID_NIC_7_1:
8655 	case GAUDI_QUEUE_ID_NIC_8_1:
8656 	case GAUDI_QUEUE_ID_NIC_9_1:
8657 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8658 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8659 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8660 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8661 		break;
8662 	case GAUDI_QUEUE_ID_NIC_0_2:
8663 	case GAUDI_QUEUE_ID_NIC_1_2:
8664 	case GAUDI_QUEUE_ID_NIC_2_2:
8665 	case GAUDI_QUEUE_ID_NIC_3_2:
8666 	case GAUDI_QUEUE_ID_NIC_4_2:
8667 	case GAUDI_QUEUE_ID_NIC_5_2:
8668 	case GAUDI_QUEUE_ID_NIC_6_2:
8669 	case GAUDI_QUEUE_ID_NIC_7_2:
8670 	case GAUDI_QUEUE_ID_NIC_8_2:
8671 	case GAUDI_QUEUE_ID_NIC_9_2:
8672 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8673 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8674 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8675 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8676 		break;
8677 	case GAUDI_QUEUE_ID_NIC_0_3:
8678 	case GAUDI_QUEUE_ID_NIC_1_3:
8679 	case GAUDI_QUEUE_ID_NIC_2_3:
8680 	case GAUDI_QUEUE_ID_NIC_3_3:
8681 	case GAUDI_QUEUE_ID_NIC_4_3:
8682 	case GAUDI_QUEUE_ID_NIC_5_3:
8683 	case GAUDI_QUEUE_ID_NIC_6_3:
8684 	case GAUDI_QUEUE_ID_NIC_7_3:
8685 	case GAUDI_QUEUE_ID_NIC_8_3:
8686 	case GAUDI_QUEUE_ID_NIC_9_3:
8687 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8688 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8689 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8690 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8691 		break;
8692 	default:
8693 		return -EINVAL;
8694 	}
8695 
8696 	*addr = CFG_BASE + offset;
8697 
8698 	return 0;
8699 }
8700 
8701 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8702 {
8703 	u64 monitor_base;
8704 	u32 size = 0;
8705 	u16 msg_addr_offset;
8706 
8707 	/*
8708 	 * monitor_base should be the content of the base0 address registers,
8709 	 * so it will be added to the msg short offsets
8710 	 */
8711 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8712 
8713 	/* First monitor config packet: low address of the sync */
8714 	msg_addr_offset =
8715 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8716 				monitor_base;
8717 
8718 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8719 					msg_addr_offset);
8720 
8721 	/* Second monitor config packet: high address of the sync */
8722 	msg_addr_offset =
8723 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8724 				monitor_base;
8725 
8726 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8727 					msg_addr_offset);
8728 
8729 	/*
8730 	 * Third monitor config packet: the payload, i.e. what to write when the
8731 	 * sync triggers
8732 	 */
8733 	msg_addr_offset =
8734 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8735 				monitor_base;
8736 
8737 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8738 
8739 	return size;
8740 }
8741 
8742 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8743 				struct hl_gen_wait_properties *prop)
8744 {
8745 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8746 	void *buf = cb->kernel_address;
8747 	u64 fence_addr = 0;
8748 	u32 size = prop->size;
8749 
8750 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8751 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8752 				prop->q_idx);
8753 		return 0;
8754 	}
8755 
8756 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8757 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8758 			prop->sob_mask, prop->sob_val, prop->mon_id);
8759 	size += gaudi_add_fence_pkt(buf + size);
8760 
8761 	return size;
8762 }
8763 
8764 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8765 {
8766 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8767 
8768 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8769 		hw_sob->sob_id);
8770 
8771 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8772 			hw_sob->sob_id * 4, 0);
8773 
8774 	kref_init(&hw_sob->kref);
8775 }
8776 
8777 static u64 gaudi_get_device_time(struct hl_device *hdev)
8778 {
8779 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8780 
8781 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8782 }
8783 
8784 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8785 				u32 *block_size, u32 *block_id)
8786 {
8787 	return -EPERM;
8788 }
8789 
8790 static int gaudi_block_mmap(struct hl_device *hdev,
8791 				struct vm_area_struct *vma,
8792 				u32 block_id, u32 block_size)
8793 {
8794 	return -EPERM;
8795 }
8796 
8797 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8798 {
8799 	struct cpu_dyn_regs *dyn_regs =
8800 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8801 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8802 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8803 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8804 
8805 	WREG32(irq_handler_offset,
8806 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8807 }
8808 
8809 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8810 {
8811 	return -EINVAL;
8812 }
8813 
8814 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8815 {
8816 	switch (pll_idx) {
8817 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8818 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8819 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8820 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8821 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8822 	case HL_GAUDI_MME_PLL: return MME_PLL;
8823 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8824 	case HL_GAUDI_IF_PLL: return IF_PLL;
8825 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8826 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8827 	default: return -EINVAL;
8828 	}
8829 }
8830 
8831 static int gaudi_add_sync_to_engine_map_entry(
8832 	struct hl_sync_to_engine_map *map, u32 reg_value,
8833 	enum hl_sync_engine_type engine_type, u32 engine_id)
8834 {
8835 	struct hl_sync_to_engine_map_entry *entry;
8836 
8837 	/* Reg value represents a partial address of sync object,
8838 	 * it is used as unique identifier. For this we need to
8839 	 * clear the cutoff cfg base bits from the value.
8840 	 */
8841 	if (reg_value == 0 || reg_value == 0xffffffff)
8842 		return 0;
8843 	reg_value -= lower_32_bits(CFG_BASE);
8844 
8845 	/* create a new hash entry */
8846 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8847 	if (!entry)
8848 		return -ENOMEM;
8849 	entry->engine_type = engine_type;
8850 	entry->engine_id = engine_id;
8851 	entry->sync_id = reg_value;
8852 	hash_add(map->tb, &entry->node, reg_value);
8853 
8854 	return 0;
8855 }
8856 
8857 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8858 				struct hl_sync_to_engine_map *map)
8859 {
8860 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8861 	int i, j, rc;
8862 	u32 reg_value;
8863 
8864 	/* Iterate over TPC engines */
8865 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8866 
8867 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8868 					sds->props[SP_NEXT_TPC] * i);
8869 
8870 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8871 							ENGINE_TPC, i);
8872 		if (rc)
8873 			goto free_sync_to_engine_map;
8874 	}
8875 
8876 	/* Iterate over MME engines */
8877 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8878 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8879 
8880 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8881 						sds->props[SP_NEXT_MME] * i +
8882 						j * sizeof(u32));
8883 
8884 			rc = gaudi_add_sync_to_engine_map_entry(
8885 				map, reg_value, ENGINE_MME,
8886 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8887 			if (rc)
8888 				goto free_sync_to_engine_map;
8889 		}
8890 	}
8891 
8892 	/* Iterate over DMA engines */
8893 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8894 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8895 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8896 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8897 							ENGINE_DMA, i);
8898 		if (rc)
8899 			goto free_sync_to_engine_map;
8900 	}
8901 
8902 	return 0;
8903 
8904 free_sync_to_engine_map:
8905 	hl_state_dump_free_sync_to_engine_map(map);
8906 
8907 	return rc;
8908 }
8909 
8910 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8911 {
8912 	return FIELD_GET(
8913 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8914 		mon->status);
8915 }
8916 
8917 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8918 {
8919 	const size_t max_write = 10;
8920 	u32 gid, mask, sob;
8921 	int i, offset;
8922 
8923 	/* Sync object ID is calculated as follows:
8924 	 * (8 * group_id + cleared bits in mask)
8925 	 */
8926 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8927 			mon->arm_data);
8928 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8929 			mon->arm_data);
8930 
8931 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8932 		max_write; mask >>= 1, i++) {
8933 		if (!(mask & 1)) {
8934 			sob = gid * MONITOR_MAX_SOBS + i;
8935 
8936 			if (offset > 0)
8937 				offset += snprintf(sobs + offset, max_write,
8938 							", ");
8939 
8940 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8941 		}
8942 	}
8943 }
8944 
8945 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8946 				struct hl_device *hdev,
8947 				struct hl_mon_state_dump *mon)
8948 {
8949 	const char *name;
8950 	char scratch_buf1[BIN_REG_STRING_SIZE],
8951 		scratch_buf2[BIN_REG_STRING_SIZE];
8952 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8953 
8954 	name = hl_state_dump_get_monitor_name(hdev, mon);
8955 	if (!name)
8956 		name = "";
8957 
8958 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8959 
8960 	return hl_snprintf_resize(
8961 		buf, size, offset,
8962 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8963 		mon->id, name,
8964 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8965 				mon->arm_data),
8966 		hl_format_as_binary(
8967 			scratch_buf1, sizeof(scratch_buf1),
8968 			FIELD_GET(
8969 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8970 				mon->arm_data)),
8971 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8972 				mon->arm_data),
8973 		mon->wr_data,
8974 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8975 		hl_format_as_binary(
8976 			scratch_buf2, sizeof(scratch_buf2),
8977 			FIELD_GET(
8978 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8979 				mon->status)),
8980 		monitored_sobs);
8981 }
8982 
8983 
8984 static int gaudi_print_fences_single_engine(
8985 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8986 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8987 	size_t *size, size_t *offset)
8988 {
8989 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8990 	int rc = -ENOMEM, i;
8991 	u32 *statuses, *fences;
8992 
8993 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8994 			sizeof(*statuses), GFP_KERNEL);
8995 	if (!statuses)
8996 		goto out;
8997 
8998 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8999 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
9000 			 sizeof(*fences), GFP_KERNEL);
9001 	if (!fences)
9002 		goto free_status;
9003 
9004 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9005 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9006 
9007 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9008 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9009 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9010 
9011 	/* The actual print */
9012 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9013 		u32 fence_id;
9014 		u64 fence_cnt, fence_rdata;
9015 		const char *engine_name;
9016 
9017 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9018 			statuses[i]))
9019 			continue;
9020 
9021 		fence_id =
9022 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9023 		fence_cnt = base_offset + CFG_BASE +
9024 			sizeof(u32) *
9025 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9026 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9027 				sds->props[SP_FENCE0_RDATA_OFFSET];
9028 		engine_name = hl_sync_engine_to_string(engine_type);
9029 
9030 		rc = hl_snprintf_resize(
9031 			buf, size, offset,
9032 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9033 			engine_name, engine_id,
9034 			i, fence_id,
9035 			fence_cnt, engine_name, engine_id, fence_id, i,
9036 			fence_rdata, engine_name, engine_id, fence_id, i,
9037 			fences[fence_id],
9038 			statuses[i]);
9039 		if (rc)
9040 			goto free_fences;
9041 	}
9042 
9043 	rc = 0;
9044 
9045 free_fences:
9046 	kfree(fences);
9047 free_status:
9048 	kfree(statuses);
9049 out:
9050 	return rc;
9051 }
9052 
9053 
9054 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9055 	.monitor_valid = gaudi_monitor_valid,
9056 	.print_single_monitor = gaudi_print_single_monitor,
9057 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9058 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9059 };
9060 
9061 static void gaudi_state_dump_init(struct hl_device *hdev)
9062 {
9063 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9064 	int i;
9065 
9066 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9067 		hash_add(sds->so_id_to_str_tb,
9068 			&gaudi_so_id_to_str[i].node,
9069 			gaudi_so_id_to_str[i].id);
9070 
9071 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9072 		hash_add(sds->monitor_id_to_str_tb,
9073 			&gaudi_monitor_id_to_str[i].node,
9074 			gaudi_monitor_id_to_str[i].id);
9075 
9076 	sds->props = gaudi_state_dump_specs_props;
9077 
9078 	sds->sync_namager_names = gaudi_sync_manager_names;
9079 
9080 	sds->funcs = gaudi_state_dump_funcs;
9081 }
9082 
9083 static u32 *gaudi_get_stream_master_qid_arr(void)
9084 {
9085 	return gaudi_stream_master;
9086 }
9087 
9088 static int gaudi_set_dram_properties(struct hl_device *hdev)
9089 {
9090 	return 0;
9091 }
9092 
9093 static int gaudi_set_binning_masks(struct hl_device *hdev)
9094 {
9095 	return 0;
9096 }
9097 
9098 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9099 {
9100 }
9101 
9102 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9103 {
9104 	struct hl_device *hdev = dev_get_drvdata(dev);
9105 	struct cpucp_info *cpucp_info;
9106 
9107 	cpucp_info = &hdev->asic_prop.cpucp_info;
9108 
9109 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9110 }
9111 
9112 static DEVICE_ATTR_RO(infineon_ver);
9113 
9114 static struct attribute *gaudi_vrm_dev_attrs[] = {
9115 	&dev_attr_infineon_ver.attr,
9116 	NULL,
9117 };
9118 
9119 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9120 					struct attribute_group *dev_vrm_attr_grp)
9121 {
9122 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9123 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9124 }
9125 
9126 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9127 {
9128 	return 0;
9129 }
9130 
9131 static const struct hl_asic_funcs gaudi_funcs = {
9132 	.early_init = gaudi_early_init,
9133 	.early_fini = gaudi_early_fini,
9134 	.late_init = gaudi_late_init,
9135 	.late_fini = gaudi_late_fini,
9136 	.sw_init = gaudi_sw_init,
9137 	.sw_fini = gaudi_sw_fini,
9138 	.hw_init = gaudi_hw_init,
9139 	.hw_fini = gaudi_hw_fini,
9140 	.halt_engines = gaudi_halt_engines,
9141 	.suspend = gaudi_suspend,
9142 	.resume = gaudi_resume,
9143 	.mmap = gaudi_mmap,
9144 	.ring_doorbell = gaudi_ring_doorbell,
9145 	.pqe_write = gaudi_pqe_write,
9146 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9147 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9148 	.scrub_device_mem = gaudi_scrub_device_mem,
9149 	.scrub_device_dram = gaudi_scrub_device_dram,
9150 	.get_int_queue_base = gaudi_get_int_queue_base,
9151 	.test_queues = gaudi_test_queues,
9152 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9153 	.asic_dma_pool_free = gaudi_dma_pool_free,
9154 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9155 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9156 	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9157 	.cs_parser = gaudi_cs_parser,
9158 	.dma_map_sgtable = hl_asic_dma_map_sgtable,
9159 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9160 	.update_eq_ci = gaudi_update_eq_ci,
9161 	.context_switch = gaudi_context_switch,
9162 	.restore_phase_topology = gaudi_restore_phase_topology,
9163 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9164 	.add_device_attr = gaudi_add_device_attr,
9165 	.handle_eqe = gaudi_handle_eqe,
9166 	.get_events_stat = gaudi_get_events_stat,
9167 	.read_pte = gaudi_read_pte,
9168 	.write_pte = gaudi_write_pte,
9169 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9170 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9171 	.mmu_prefetch_cache_range = NULL,
9172 	.send_heartbeat = gaudi_send_heartbeat,
9173 	.debug_coresight = gaudi_debug_coresight,
9174 	.is_device_idle = gaudi_is_device_idle,
9175 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9176 	.hw_queues_lock = gaudi_hw_queues_lock,
9177 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9178 	.get_pci_id = gaudi_get_pci_id,
9179 	.get_eeprom_data = gaudi_get_eeprom_data,
9180 	.get_monitor_dump = gaudi_get_monitor_dump,
9181 	.send_cpu_message = gaudi_send_cpu_message,
9182 	.pci_bars_map = gaudi_pci_bars_map,
9183 	.init_iatu = gaudi_init_iatu,
9184 	.rreg = hl_rreg,
9185 	.wreg = hl_wreg,
9186 	.halt_coresight = gaudi_halt_coresight,
9187 	.ctx_init = gaudi_ctx_init,
9188 	.ctx_fini = gaudi_ctx_fini,
9189 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9190 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9191 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9192 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9193 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9194 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9195 	.gen_signal_cb = gaudi_gen_signal_cb,
9196 	.gen_wait_cb = gaudi_gen_wait_cb,
9197 	.reset_sob = gaudi_reset_sob,
9198 	.reset_sob_group = gaudi_reset_sob_group,
9199 	.get_device_time = gaudi_get_device_time,
9200 	.pb_print_security_errors = NULL,
9201 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9202 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9203 	.get_dec_base_addr = NULL,
9204 	.scramble_addr = hl_mmu_scramble_addr,
9205 	.descramble_addr = hl_mmu_descramble_addr,
9206 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9207 	.get_hw_block_id = gaudi_get_hw_block_id,
9208 	.hw_block_mmap = gaudi_block_mmap,
9209 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9210 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9211 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9212 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9213 	.init_firmware_loader = gaudi_init_firmware_loader,
9214 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9215 	.state_dump_init = gaudi_state_dump_init,
9216 	.get_sob_addr = gaudi_get_sob_addr,
9217 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9218 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9219 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9220 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9221 	.access_dev_mem = hl_access_dev_mem,
9222 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9223 	.send_device_activity = gaudi_send_device_activity,
9224 	.set_dram_properties = gaudi_set_dram_properties,
9225 	.set_binning_masks = gaudi_set_binning_masks,
9226 };
9227 
9228 /**
9229  * gaudi_set_asic_funcs - set GAUDI function pointers
9230  *
9231  * @hdev: pointer to hl_device structure
9232  *
9233  */
9234 void gaudi_set_asic_funcs(struct hl_device *hdev)
9235 {
9236 	hdev->asic_funcs = &gaudi_funcs;
9237 }
9238