xref: /linux/drivers/accel/habanalabs/gaudi/gaudi.c (revision 189f164e573e18d9f8876dbd3ad8fcbe11f93037)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2022 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15 
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22 
23 /*
24  * Gaudi security scheme:
25  *
26  * 1. Host is protected by:
27  *        - Range registers
28  *        - MMU
29  *
30  * 2. DDR is protected by:
31  *        - Range registers (protect the first 512MB)
32  *
33  * 3. Configuration is protected by:
34  *        - Range registers
35  *        - Protection bits
36  *
37  * MMU is always enabled.
38  *
39  * QMAN DMA channels 0,1 (PCI DMAN):
40  *     - DMA is not secured.
41  *     - PQ and CQ are secured.
42  *     - CP is secured: The driver needs to parse CB but WREG should be allowed
43  *                      because of TDMA (tensor DMA). Hence, WREG is always not
44  *                      secured.
45  *
46  * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47  * channel 0 to be secured, execute the DMA and change it back to not secured.
48  * Currently, the driver doesn't use the DMA while there are compute jobs
49  * running.
50  *
51  * The current use cases for the driver to use the DMA are:
52  *     - Clear SRAM on context switch (happens on context switch when device is
53  *       idle)
54  *     - MMU page tables area clear (happens on init)
55  *
56  * QMAN DMA 2-7, TPC, MME, NIC:
57  * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58  * CQ, CP and the engine are not secured
59  *
60  */
61 
62 #define GAUDI_BOOT_FIT_FILE	"habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE	"habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE	"habanalabs/gaudi/gaudi_tpc.bin"
65 
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69 
70 #define GAUDI_DMA_POOL_BLK_SIZE		0x100 /* 256 bytes */
71 
72 #define GAUDI_RESET_TIMEOUT_MSEC	2000		/* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC		1		/* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC	200		/* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC	100000		/* 100ms */
76 
77 #define GAUDI_PLDM_RESET_WAIT_MSEC	1000		/* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC	20000		/* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC	1000000		/* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC	(MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC	(HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC	4000000		/* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC	4000000		/* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC	15000000	/* 15s */
86 
87 #define GAUDI_QMAN0_FENCE_VAL		0x72E91AB9
88 
89 #define GAUDI_MAX_STRING_LEN		20
90 
91 #define GAUDI_CB_POOL_CB_CNT		512
92 #define GAUDI_CB_POOL_CB_SIZE		0x20000 /* 128KB */
93 
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT	3
95 
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE	20
97 
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE	16
99 
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE	3
101 
102 #define GAUDI_ARB_WDT_TIMEOUT		0xEE6b27FF /* 8 seconds */
103 
104 #define HBM_SCRUBBING_TIMEOUT_US	1000000 /* 1s */
105 
106 #define BIN_REG_STRING_SIZE	sizeof("0b10101010101010101010101010101010")
107 
108 #define MONITOR_SOB_STRING_SIZE		256
109 
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 	GAUDI_QUEUE_ID_DMA_0_0,
112 	GAUDI_QUEUE_ID_DMA_0_1,
113 	GAUDI_QUEUE_ID_DMA_0_2,
114 	GAUDI_QUEUE_ID_DMA_0_3,
115 	GAUDI_QUEUE_ID_DMA_1_0,
116 	GAUDI_QUEUE_ID_DMA_1_1,
117 	GAUDI_QUEUE_ID_DMA_1_2,
118 	GAUDI_QUEUE_ID_DMA_1_3
119 };
120 
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 	[GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 	[GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 	[GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 	[GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 	[GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 	[GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 	[GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 	[GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131 
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 	[0] = GAUDI_QUEUE_ID_DMA_0_0,
134 	[1] = GAUDI_QUEUE_ID_DMA_0_1,
135 	[2] = GAUDI_QUEUE_ID_DMA_0_2,
136 	[3] = GAUDI_QUEUE_ID_DMA_0_3,
137 	[4] = GAUDI_QUEUE_ID_DMA_1_0,
138 	[5] = GAUDI_QUEUE_ID_DMA_1_1,
139 	[6] = GAUDI_QUEUE_ID_DMA_1_2,
140 	[7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142 
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 	[PACKET_WREG_32]	= sizeof(struct packet_wreg32),
145 	[PACKET_WREG_BULK]	= sizeof(struct packet_wreg_bulk),
146 	[PACKET_MSG_LONG]	= sizeof(struct packet_msg_long),
147 	[PACKET_MSG_SHORT]	= sizeof(struct packet_msg_short),
148 	[PACKET_CP_DMA]		= sizeof(struct packet_cp_dma),
149 	[PACKET_REPEAT]		= sizeof(struct packet_repeat),
150 	[PACKET_MSG_PROT]	= sizeof(struct packet_msg_prot),
151 	[PACKET_FENCE]		= sizeof(struct packet_fence),
152 	[PACKET_LIN_DMA]	= sizeof(struct packet_lin_dma),
153 	[PACKET_NOP]		= sizeof(struct packet_nop),
154 	[PACKET_STOP]		= sizeof(struct packet_stop),
155 	[PACKET_ARB_POINT]	= sizeof(struct packet_arb_point),
156 	[PACKET_WAIT]		= sizeof(struct packet_wait),
157 	[PACKET_LOAD_AND_EXE]	= sizeof(struct packet_load_and_exe)
158 };
159 
validate_packet_id(enum packet_id id)160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 	switch (id) {
163 	case PACKET_WREG_32:
164 	case PACKET_WREG_BULK:
165 	case PACKET_MSG_LONG:
166 	case PACKET_MSG_SHORT:
167 	case PACKET_CP_DMA:
168 	case PACKET_REPEAT:
169 	case PACKET_MSG_PROT:
170 	case PACKET_FENCE:
171 	case PACKET_LIN_DMA:
172 	case PACKET_NOP:
173 	case PACKET_STOP:
174 	case PACKET_ARB_POINT:
175 	case PACKET_WAIT:
176 	case PACKET_LOAD_AND_EXE:
177 		return true;
178 	default:
179 		return false;
180 	}
181 }
182 
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 	"tpc_address_exceed_slm",
186 	"tpc_div_by_0",
187 	"tpc_spu_mac_overflow",
188 	"tpc_spu_addsub_overflow",
189 	"tpc_spu_abs_overflow",
190 	"tpc_spu_fp_dst_nan_inf",
191 	"tpc_spu_fp_dst_denorm",
192 	"tpc_vpu_mac_overflow",
193 	"tpc_vpu_addsub_overflow",
194 	"tpc_vpu_abs_overflow",
195 	"tpc_vpu_fp_dst_nan_inf",
196 	"tpc_vpu_fp_dst_denorm",
197 	"tpc_assertions",
198 	"tpc_illegal_instruction",
199 	"tpc_pc_wrap_around",
200 	"tpc_qm_sw_err",
201 	"tpc_hbw_rresp_err",
202 	"tpc_hbw_bresp_err",
203 	"tpc_lbw_rresp_err",
204 	"tpc_lbw_bresp_err"
205 };
206 
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 	"PQ AXI HBW error",
210 	"CQ AXI HBW error",
211 	"CP AXI HBW error",
212 	"CP error due to undefined OPCODE",
213 	"CP encountered STOP OPCODE",
214 	"CP AXI LBW error",
215 	"CP WRREG32 or WRBULK returned error",
216 	"N/A",
217 	"FENCE 0 inc over max value and clipped",
218 	"FENCE 1 inc over max value and clipped",
219 	"FENCE 2 inc over max value and clipped",
220 	"FENCE 3 inc over max value and clipped",
221 	"FENCE 0 dec under min value and clipped",
222 	"FENCE 1 dec under min value and clipped",
223 	"FENCE 2 dec under min value and clipped",
224 	"FENCE 3 dec under min value and clipped"
225 };
226 
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 	"Choice push while full error",
230 	"Choice Q watchdog error",
231 	"MSG AXI LBW returned with error"
232 };
233 
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 	QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 	QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 	QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349 
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 	{ .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 	{ .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 	{ .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 	{ .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 	{ .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 	{ .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 	{ .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 	{ .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 	{ .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 	{ .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 	{ .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 	{ .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 	{ .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 	{ .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 	{ .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 	{ .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 	{ .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 	{ .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 	{ .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 	{ .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 	{ .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 	{ .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 	{ .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 	{ .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 	{ .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 	{ .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 	{ .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379 
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 	{ .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 	{ .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 	{ .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 	{ .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 	{ .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 	{ .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 	{ .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 	{ .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 	{ .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 	{ .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 	{ .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393 
394 static s64 gaudi_state_dump_specs_props[] = {
395 	[SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 	[SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 	[SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 	[SP_MON_OBJ_WR_ADDR_LOW] =
399 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 	[SP_MON_OBJ_WR_ADDR_HIGH] =
401 		mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 	[SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 	[SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 	[SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 	[SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 	[SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 	[SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 	[SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 	[SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 	[SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 	[SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 	[SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 	[SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 	[SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 	[SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 	[SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 	[SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 	[SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 	[SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 	[SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 	[SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 	[SP_FENCE0_CNT_OFFSET] =
423 		mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 	[SP_FENCE0_RDATA_OFFSET] =
425 		mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 	[SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 	[SP_NUM_CORES] = 1,
428 };
429 
430 static const int gaudi_queue_id_to_engine_id[] = {
431 	[GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 	[GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 	[GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 	[GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 	[GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 	[GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 	[GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 	[GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 	[GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 	[GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 	[GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 	[GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 	[GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 	[GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 	[GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 	[GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 	[GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 	[GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 	[GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 	[GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 	[GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 	[GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 	[GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 	[GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 	[GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 	[GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 	[GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 	[GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 	[GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461 
462 /* The order here is opposite to the order of the indexing in the h/w.
463  * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464  */
465 static const char * const gaudi_sync_manager_names[] = {
466 	"SYNC_MGR_E_N",
467 	"SYNC_MGR_W_N",
468 	"SYNC_MGR_E_S",
469 	"SYNC_MGR_W_S",
470 	NULL
471 };
472 
473 struct ecc_info_extract_params {
474 	u64 block_address;
475 	u32 num_memories;
476 	bool derr;
477 };
478 
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 								u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 					struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 					u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 					u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 				u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 				u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 				struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 	if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 		return HL_COLLECTIVE_MASTER;
502 
503 	if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 			queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 		return HL_COLLECTIVE_SLAVE;
506 
507 	if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 			queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 		return HL_COLLECTIVE_SLAVE;
510 
511 	if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 			queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 		return HL_COLLECTIVE_SLAVE;
514 
515 	return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517 
set_default_power_values(struct hl_device * hdev)518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 	struct asic_fixed_properties *prop = &hdev->asic_prop;
521 
522 	if (hdev->card_type == cpucp_card_type_pmc) {
523 		prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524 
525 		if (prop->fw_security_enabled)
526 			prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 		else
528 			prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 	} else {
530 		prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 		prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 	}
533 }
534 
gaudi_set_fixed_properties(struct hl_device * hdev)535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 	struct asic_fixed_properties *prop = &hdev->asic_prop;
538 	u32 num_sync_stream_queues = 0;
539 	int i;
540 
541 	prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 	prop->hw_queues_props = kzalloc_objs(struct hw_queue_properties,
543 					     prop->max_queues);
544 
545 	if (!prop->hw_queues_props)
546 		return -ENOMEM;
547 
548 	for (i = 0 ; i < prop->max_queues ; i++) {
549 		if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
550 			prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
551 			prop->hw_queues_props[i].driver_only = 0;
552 			prop->hw_queues_props[i].supports_sync_stream = 1;
553 			prop->hw_queues_props[i].cb_alloc_flags =
554 				CB_ALLOC_KERNEL;
555 			num_sync_stream_queues++;
556 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
557 			prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
558 			prop->hw_queues_props[i].driver_only = 1;
559 			prop->hw_queues_props[i].supports_sync_stream = 0;
560 			prop->hw_queues_props[i].cb_alloc_flags =
561 				CB_ALLOC_KERNEL;
562 		} else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
563 			prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
564 			prop->hw_queues_props[i].driver_only = 0;
565 			prop->hw_queues_props[i].supports_sync_stream = 0;
566 			prop->hw_queues_props[i].cb_alloc_flags =
567 				CB_ALLOC_USER;
568 
569 		}
570 		prop->hw_queues_props[i].collective_mode =
571 						get_collective_mode(hdev, i);
572 	}
573 
574 	prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
575 	prop->cfg_base_address = CFG_BASE;
576 	prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
577 	prop->host_base_address = HOST_PHYS_BASE;
578 	prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
579 	prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
580 	prop->completion_mode = HL_COMPLETION_MODE_JOB;
581 	prop->collective_first_sob = 0;
582 	prop->collective_first_mon = 0;
583 
584 	/* 2 SOBs per internal queue stream are reserved for collective */
585 	prop->sync_stream_first_sob =
586 			ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
587 			* QMAN_STREAMS * HL_RSVD_SOBS;
588 
589 	/* 1 monitor per internal queue stream are reserved for collective
590 	 * 2 monitors per external queue stream are reserved for collective
591 	 */
592 	prop->sync_stream_first_mon =
593 			(NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
594 			(NUMBER_OF_EXT_HW_QUEUES * 2);
595 
596 	prop->dram_base_address = DRAM_PHYS_BASE;
597 	prop->dram_size = GAUDI_HBM_SIZE_32GB;
598 	prop->dram_end_address = prop->dram_base_address + prop->dram_size;
599 	prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
600 
601 	prop->sram_base_address = SRAM_BASE_ADDR;
602 	prop->sram_size = SRAM_SIZE;
603 	prop->sram_end_address = prop->sram_base_address + prop->sram_size;
604 	prop->sram_user_base_address =
605 			prop->sram_base_address + SRAM_USER_BASE_OFFSET;
606 
607 	prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
608 	prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
609 
610 	prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
611 	if (hdev->pldm)
612 		prop->mmu_pgt_size = 0x800000; /* 8MB */
613 	else
614 		prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
615 	prop->mmu_pte_size = HL_PTE_SIZE;
616 	prop->dram_page_size = PAGE_SIZE_2MB;
617 	prop->device_mem_alloc_default_page_size = prop->dram_page_size;
618 	prop->dram_supports_virtual_memory = false;
619 
620 	prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
621 	prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
622 	prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
623 	prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
624 	prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
625 	prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
626 	prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
627 	prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
628 	prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
629 	prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
630 	prop->pmmu.start_addr = VA_HOST_SPACE_START;
631 	prop->pmmu.end_addr =
632 			(VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
633 	prop->pmmu.page_size = PAGE_SIZE_4KB;
634 	prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
635 	prop->pmmu.last_mask = LAST_MASK;
636 	/* TODO: will be duplicated until implementing per-MMU props */
637 	prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
638 	prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
639 
640 	/* PMMU and HPMMU are the same except of page size */
641 	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
642 	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
643 
644 	/* shifts and masks are the same in PMMU and DMMU */
645 	memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
646 	prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
647 	prop->dmmu.end_addr = VA_HOST_SPACE_END;
648 	prop->dmmu.page_size = PAGE_SIZE_2MB;
649 	prop->dmmu.pgt_size = prop->mmu_pgt_size;
650 
651 	prop->cfg_size = CFG_SIZE;
652 	prop->max_asid = MAX_ASID;
653 	prop->num_of_events = GAUDI_EVENT_SIZE;
654 	prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
655 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
656 
657 	set_default_power_values(hdev);
658 
659 	prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
660 	prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
661 
662 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
663 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
664 
665 	strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
666 					CARD_NAME_MAX_LEN);
667 
668 	prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
669 
670 	prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
671 			prop->sync_stream_first_sob +
672 			(num_sync_stream_queues * HL_RSVD_SOBS);
673 	prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
674 			prop->sync_stream_first_mon +
675 			(num_sync_stream_queues * HL_RSVD_MONS);
676 
677 	prop->first_available_user_interrupt = USHRT_MAX;
678 	prop->tpc_interrupt_id = USHRT_MAX;
679 
680 	/* single msi */
681 	prop->eq_interrupt_id = 0;
682 
683 	for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 		prop->first_available_cq[i] = USHRT_MAX;
685 
686 	prop->fw_cpu_boot_dev_sts0_valid = false;
687 	prop->fw_cpu_boot_dev_sts1_valid = false;
688 	prop->hard_reset_done_by_fw = false;
689 	prop->gic_interrupts_enable = true;
690 
691 	prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692 
693 	prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 	prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695 
696 	prop->use_get_power_for_reset_history = true;
697 
698 	prop->configurable_stop_on_err = true;
699 
700 	prop->set_max_power_on_device_init = true;
701 
702 	prop->dma_mask = 48;
703 
704 	prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
705 
706 	return 0;
707 }
708 
gaudi_pci_bars_map(struct hl_device * hdev)709 static int gaudi_pci_bars_map(struct hl_device *hdev)
710 {
711 	static const char * const name[] = {"SRAM", "CFG", "HBM"};
712 	bool is_wc[3] = {false, false, true};
713 	int rc;
714 
715 	rc = hl_pci_bars_map(hdev, name, is_wc);
716 	if (rc)
717 		return rc;
718 
719 	hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
720 			(CFG_BASE - SPI_FLASH_BASE_ADDR);
721 
722 	return 0;
723 }
724 
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
726 {
727 	struct gaudi_device *gaudi = hdev->asic_specific;
728 	struct hl_inbound_pci_region pci_region;
729 	u64 old_addr = addr;
730 	int rc;
731 
732 	if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
733 		return old_addr;
734 
735 	if (hdev->asic_prop.iatu_done_by_fw)
736 		return U64_MAX;
737 
738 	/* Inbound Region 2 - Bar 4 - Point to HBM */
739 	pci_region.mode = PCI_BAR_MATCH_MODE;
740 	pci_region.bar = HBM_BAR_ID;
741 	pci_region.addr = addr;
742 	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
743 	if (rc)
744 		return U64_MAX;
745 
746 	if (gaudi) {
747 		old_addr = gaudi->hbm_bar_cur_addr;
748 		gaudi->hbm_bar_cur_addr = addr;
749 	}
750 
751 	return old_addr;
752 }
753 
gaudi_init_iatu(struct hl_device * hdev)754 static int gaudi_init_iatu(struct hl_device *hdev)
755 {
756 	struct hl_inbound_pci_region inbound_region;
757 	struct hl_outbound_pci_region outbound_region;
758 	int rc;
759 
760 	if (hdev->asic_prop.iatu_done_by_fw)
761 		return 0;
762 
763 	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
764 	inbound_region.mode = PCI_BAR_MATCH_MODE;
765 	inbound_region.bar = SRAM_BAR_ID;
766 	inbound_region.addr = SRAM_BASE_ADDR;
767 	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
768 	if (rc)
769 		goto done;
770 
771 	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
772 	inbound_region.mode = PCI_BAR_MATCH_MODE;
773 	inbound_region.bar = CFG_BAR_ID;
774 	inbound_region.addr = SPI_FLASH_BASE_ADDR;
775 	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
776 	if (rc)
777 		goto done;
778 
779 	/* Inbound Region 2 - Bar 4 - Point to HBM */
780 	inbound_region.mode = PCI_BAR_MATCH_MODE;
781 	inbound_region.bar = HBM_BAR_ID;
782 	inbound_region.addr = DRAM_PHYS_BASE;
783 	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
784 	if (rc)
785 		goto done;
786 
787 	/* Outbound Region 0 - Point to Host */
788 	outbound_region.addr = HOST_PHYS_BASE;
789 	outbound_region.size = HOST_PHYS_SIZE;
790 	rc = hl_pci_set_outbound_region(hdev, &outbound_region);
791 
792 done:
793 	return rc;
794 }
795 
gaudi_get_hw_state(struct hl_device * hdev)796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
797 {
798 	return RREG32(mmHW_STATE);
799 }
800 
gaudi_early_init(struct hl_device * hdev)801 static int gaudi_early_init(struct hl_device *hdev)
802 {
803 	struct asic_fixed_properties *prop = &hdev->asic_prop;
804 	struct pci_dev *pdev = hdev->pdev;
805 	resource_size_t pci_bar_size;
806 	u32 fw_boot_status;
807 	int rc;
808 
809 	rc = gaudi_set_fixed_properties(hdev);
810 	if (rc) {
811 		dev_err(hdev->dev, "Failed setting fixed properties\n");
812 		return rc;
813 	}
814 
815 	/* Check BAR sizes */
816 	pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
817 
818 	if (pci_bar_size != SRAM_BAR_SIZE) {
819 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
820 			SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
821 		rc = -ENODEV;
822 		goto free_queue_props;
823 	}
824 
825 	pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
826 
827 	if (pci_bar_size != CFG_BAR_SIZE) {
828 		dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
829 			CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
830 		rc = -ENODEV;
831 		goto free_queue_props;
832 	}
833 
834 	prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
835 	hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
836 
837 	/* If FW security is enabled at this point it means no access to ELBI */
838 	if (hdev->asic_prop.fw_security_enabled) {
839 		hdev->asic_prop.iatu_done_by_fw = true;
840 
841 		/*
842 		 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
843 		 * decision can only be taken based on PCI ID security.
844 		 */
845 		hdev->asic_prop.gic_interrupts_enable = false;
846 		goto pci_init;
847 	}
848 
849 	rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
850 				&fw_boot_status);
851 	if (rc)
852 		goto free_queue_props;
853 
854 	/* Check whether FW is configuring iATU */
855 	if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
856 			(fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
857 		hdev->asic_prop.iatu_done_by_fw = true;
858 
859 pci_init:
860 	rc = hl_pci_init(hdev);
861 	if (rc)
862 		goto free_queue_props;
863 
864 	/* Before continuing in the initialization, we need to read the preboot
865 	 * version to determine whether we run with a security-enabled firmware
866 	 */
867 	rc = hl_fw_read_preboot_status(hdev);
868 	if (rc) {
869 		if (hdev->reset_on_preboot_fail)
870 			/* we are already on failure flow, so don't check if hw_fini fails. */
871 			hdev->asic_funcs->hw_fini(hdev, true, false);
872 		goto pci_fini;
873 	}
874 
875 	if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
876 		dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
877 		rc = hdev->asic_funcs->hw_fini(hdev, true, false);
878 		if (rc) {
879 			dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
880 			goto pci_fini;
881 		}
882 	}
883 
884 	return 0;
885 
886 pci_fini:
887 	hl_pci_fini(hdev);
888 free_queue_props:
889 	kfree(hdev->asic_prop.hw_queues_props);
890 	return rc;
891 }
892 
gaudi_early_fini(struct hl_device * hdev)893 static int gaudi_early_fini(struct hl_device *hdev)
894 {
895 	kfree(hdev->asic_prop.hw_queues_props);
896 	hl_pci_fini(hdev);
897 
898 	return 0;
899 }
900 
901 /**
902  * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
903  *
904  * @hdev: pointer to hl_device structure
905  *
906  */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)907 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
908 {
909 	u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
910 	struct asic_fixed_properties *prop = &hdev->asic_prop;
911 	u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
912 	int rc;
913 
914 	if ((hdev->fw_components & FW_TYPE_LINUX) &&
915 			(prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
916 		struct gaudi_device *gaudi = hdev->asic_specific;
917 
918 		if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
919 			return 0;
920 
921 		rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
922 
923 		if (rc)
924 			return rc;
925 
926 		freq = pll_freq_arr[2];
927 	} else {
928 		/* Backward compatibility */
929 		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
930 		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
931 		nr = RREG32(mmPSOC_CPU_PLL_NR);
932 		nf = RREG32(mmPSOC_CPU_PLL_NF);
933 		od = RREG32(mmPSOC_CPU_PLL_OD);
934 
935 		if (div_sel == DIV_SEL_REF_CLK ||
936 				div_sel == DIV_SEL_DIVIDED_REF) {
937 			if (div_sel == DIV_SEL_REF_CLK)
938 				freq = PLL_REF_CLK;
939 			else
940 				freq = PLL_REF_CLK / (div_fctr + 1);
941 		} else if (div_sel == DIV_SEL_PLL_CLK ||
942 			div_sel == DIV_SEL_DIVIDED_PLL) {
943 			pll_clk = PLL_REF_CLK * (nf + 1) /
944 					((nr + 1) * (od + 1));
945 			if (div_sel == DIV_SEL_PLL_CLK)
946 				freq = pll_clk;
947 			else
948 				freq = pll_clk / (div_fctr + 1);
949 		} else {
950 			dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
951 			freq = 0;
952 		}
953 	}
954 
955 	prop->psoc_timestamp_frequency = freq;
956 	prop->psoc_pci_pll_nr = nr;
957 	prop->psoc_pci_pll_nf = nf;
958 	prop->psoc_pci_pll_od = od;
959 	prop->psoc_pci_pll_div_factor = div_fctr;
960 
961 	return 0;
962 }
963 
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)964 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
965 		dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
966 {
967 	struct asic_fixed_properties *prop = &hdev->asic_prop;
968 	struct packet_lin_dma *init_tpc_mem_pkt;
969 	struct hl_cs_job *job;
970 	struct hl_cb *cb;
971 	u64 dst_addr;
972 	u32 cb_size, ctl;
973 	u8 tpc_id;
974 	int rc;
975 
976 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
977 	if (!cb)
978 		return -EFAULT;
979 
980 	init_tpc_mem_pkt = cb->kernel_address;
981 	cb_size = sizeof(*init_tpc_mem_pkt);
982 	memset(init_tpc_mem_pkt, 0, cb_size);
983 
984 	init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
985 
986 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
987 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
988 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
989 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
990 
991 	init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
992 
993 	init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
994 
995 	/* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
996 	dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
997 				round_up(prop->sram_user_base_address, SZ_8K));
998 	init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
999 
1000 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1001 	if (!job) {
1002 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1003 		rc = -ENOMEM;
1004 		goto release_cb;
1005 	}
1006 
1007 	job->id = 0;
1008 	job->user_cb = cb;
1009 	atomic_inc(&job->user_cb->cs_cnt);
1010 	job->user_cb_size = cb_size;
1011 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1012 	job->patched_cb = job->user_cb;
1013 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1014 
1015 	hl_debugfs_add_job(hdev, job);
1016 
1017 	rc = gaudi_send_job_on_qman0(hdev, job);
1018 
1019 	if (rc)
1020 		goto free_job;
1021 
1022 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1023 		rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1024 		if (rc)
1025 			break;
1026 	}
1027 
1028 free_job:
1029 	hl_userptr_delete_list(hdev, &job->userptr_list);
1030 	hl_debugfs_remove_job(hdev, job);
1031 	kfree(job);
1032 	atomic_dec(&cb->cs_cnt);
1033 
1034 release_cb:
1035 	hl_cb_put(cb);
1036 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1037 
1038 	return rc;
1039 }
1040 
1041 /*
1042  * gaudi_init_tpc_mem() - Initialize TPC memories.
1043  * @hdev: Pointer to hl_device structure.
1044  *
1045  * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1046  *
1047  * Return: 0 for success, negative value for error.
1048  */
gaudi_init_tpc_mem(struct hl_device * hdev)1049 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1050 {
1051 	const struct firmware *fw;
1052 	size_t fw_size;
1053 	void *cpu_addr;
1054 	dma_addr_t dma_handle;
1055 	int rc, count = 5;
1056 
1057 again:
1058 	rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1059 	if (rc == -EINTR && count-- > 0) {
1060 		msleep(50);
1061 		goto again;
1062 	}
1063 
1064 	if (rc) {
1065 		dev_err(hdev->dev, "Failed to load firmware file %s\n",
1066 				GAUDI_TPC_FW_FILE);
1067 		goto out;
1068 	}
1069 
1070 	fw_size = fw->size;
1071 	cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1072 	if (!cpu_addr) {
1073 		dev_err(hdev->dev,
1074 			"Failed to allocate %zu of dma memory for TPC kernel\n",
1075 			fw_size);
1076 		rc = -ENOMEM;
1077 		goto out;
1078 	}
1079 
1080 	memcpy(cpu_addr, fw->data, fw_size);
1081 
1082 	rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1083 
1084 	hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1085 
1086 out:
1087 	release_firmware(fw);
1088 	return rc;
1089 }
1090 
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1091 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1092 {
1093 	struct gaudi_device *gaudi = hdev->asic_specific;
1094 	struct gaudi_collective_properties *prop = &gaudi->collective_props;
1095 	struct hl_hw_queue *q;
1096 	u32 i, sob_id, sob_group_id, queue_id;
1097 
1098 	/* Iterate through SOB groups and assign a SOB for each slave queue */
1099 	sob_group_id =
1100 		stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1101 	sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1102 
1103 	queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1104 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1105 		q = &hdev->kernel_queues[queue_id + (4 * i)];
1106 		q->sync_stream_prop.collective_sob_id = sob_id + i;
1107 	}
1108 
1109 	/* Both DMA5 and TPC7 use the same resources since only a single
1110 	 * engine need to participate in the reduction process
1111 	 */
1112 	queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1113 	q = &hdev->kernel_queues[queue_id];
1114 	q->sync_stream_prop.collective_sob_id =
1115 			sob_id + NIC_NUMBER_OF_ENGINES;
1116 
1117 	queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1118 	q = &hdev->kernel_queues[queue_id];
1119 	q->sync_stream_prop.collective_sob_id =
1120 			sob_id + NIC_NUMBER_OF_ENGINES;
1121 }
1122 
gaudi_sob_group_hw_reset(struct kref * ref)1123 static void gaudi_sob_group_hw_reset(struct kref *ref)
1124 {
1125 	struct gaudi_hw_sob_group *hw_sob_group =
1126 		container_of(ref, struct gaudi_hw_sob_group, kref);
1127 	struct hl_device *hdev = hw_sob_group->hdev;
1128 	int i;
1129 
1130 	for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1131 		WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1132 			(hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1133 
1134 	kref_init(&hw_sob_group->kref);
1135 }
1136 
gaudi_sob_group_reset_error(struct kref * ref)1137 static void gaudi_sob_group_reset_error(struct kref *ref)
1138 {
1139 	struct gaudi_hw_sob_group *hw_sob_group =
1140 		container_of(ref, struct gaudi_hw_sob_group, kref);
1141 	struct hl_device *hdev = hw_sob_group->hdev;
1142 
1143 	dev_crit(hdev->dev,
1144 		"SOB release shouldn't be called here, base_sob_id: %d\n",
1145 		hw_sob_group->base_sob_id);
1146 }
1147 
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1148 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1149 {
1150 	struct gaudi_collective_properties *prop;
1151 	int i;
1152 
1153 	prop = &gaudi->collective_props;
1154 
1155 	memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1156 
1157 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1158 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1159 			prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1160 					BIT(i % HL_MAX_SOBS_PER_MONITOR);
1161 	/* Set collective engine bit */
1162 	prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1163 				BIT(i % HL_MAX_SOBS_PER_MONITOR);
1164 }
1165 
gaudi_collective_init(struct hl_device * hdev)1166 static int gaudi_collective_init(struct hl_device *hdev)
1167 {
1168 	u32 i, sob_id, reserved_sobs_per_group;
1169 	struct gaudi_collective_properties *prop;
1170 	struct gaudi_device *gaudi;
1171 
1172 	gaudi = hdev->asic_specific;
1173 	prop = &gaudi->collective_props;
1174 	sob_id = hdev->asic_prop.collective_first_sob;
1175 
1176 	/* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1177 	reserved_sobs_per_group =
1178 		ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1179 
1180 	/* Init SOB groups */
1181 	for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1182 		prop->hw_sob_group[i].hdev = hdev;
1183 		prop->hw_sob_group[i].base_sob_id = sob_id;
1184 		sob_id += reserved_sobs_per_group;
1185 		gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1186 	}
1187 
1188 	for (i = 0 ; i < QMAN_STREAMS; i++) {
1189 		prop->next_sob_group_val[i] = 1;
1190 		prop->curr_sob_group_idx[i] = 0;
1191 		gaudi_collective_map_sobs(hdev, i);
1192 	}
1193 
1194 	gaudi_collective_mstr_sob_mask_set(gaudi);
1195 
1196 	return 0;
1197 }
1198 
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1199 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1200 {
1201 	struct gaudi_device *gaudi = hdev->asic_specific;
1202 	struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1203 
1204 	kref_put(&cprop->hw_sob_group[sob_group].kref,
1205 					gaudi_sob_group_hw_reset);
1206 }
1207 
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1208 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1209 		struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1210 {
1211 	u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1212 	struct gaudi_collective_properties *cprop;
1213 	struct hl_gen_wait_properties wait_prop;
1214 	struct hl_sync_stream_properties *prop;
1215 	struct gaudi_device *gaudi;
1216 
1217 	gaudi = hdev->asic_specific;
1218 	cprop = &gaudi->collective_props;
1219 	queue_id = job->hw_queue_id;
1220 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1221 
1222 	master_sob_base =
1223 		cprop->hw_sob_group[sob_group_offset].base_sob_id;
1224 	master_monitor = prop->collective_mstr_mon_id[0];
1225 
1226 	cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1227 
1228 	dev_dbg(hdev->dev,
1229 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1230 		master_sob_base, cprop->mstr_sob_mask[0],
1231 		cprop->next_sob_group_val[stream],
1232 		master_monitor, queue_id);
1233 
1234 	wait_prop.data = (void *) job->patched_cb;
1235 	wait_prop.sob_base = master_sob_base;
1236 	wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1237 	wait_prop.sob_val = cprop->next_sob_group_val[stream];
1238 	wait_prop.mon_id = master_monitor;
1239 	wait_prop.q_idx = queue_id;
1240 	wait_prop.size = cb_size;
1241 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1242 
1243 	master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1244 	master_monitor = prop->collective_mstr_mon_id[1];
1245 
1246 	dev_dbg(hdev->dev,
1247 		"Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1248 		master_sob_base, cprop->mstr_sob_mask[1],
1249 		cprop->next_sob_group_val[stream],
1250 		master_monitor, queue_id);
1251 
1252 	wait_prop.sob_base = master_sob_base;
1253 	wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1254 	wait_prop.mon_id = master_monitor;
1255 	wait_prop.size = cb_size;
1256 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257 }
1258 
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1259 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1260 		struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1261 {
1262 	struct hl_gen_wait_properties wait_prop;
1263 	struct hl_sync_stream_properties *prop;
1264 	u32 queue_id, cb_size = 0;
1265 
1266 	queue_id = job->hw_queue_id;
1267 	prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1268 
1269 	if (job->cs->encaps_signals) {
1270 		/* use the encaps signal handle store earlier in the flow
1271 		 * and set the SOB information from the encaps
1272 		 * signals handle
1273 		 */
1274 		hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1275 						cs_cmpl);
1276 
1277 		dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
1278 				job->cs->sequence,
1279 				cs_cmpl->hw_sob->sob_id,
1280 				cs_cmpl->sob_val);
1281 	}
1282 
1283 	/* Add to wait CBs using slave monitor */
1284 	wait_prop.data = (void *) job->user_cb;
1285 	wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1286 	wait_prop.sob_mask = 0x1;
1287 	wait_prop.sob_val = cs_cmpl->sob_val;
1288 	wait_prop.mon_id = prop->collective_slave_mon_id;
1289 	wait_prop.q_idx = queue_id;
1290 	wait_prop.size = cb_size;
1291 
1292 	dev_dbg(hdev->dev,
1293 		"Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1294 		cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1295 		prop->collective_slave_mon_id, queue_id);
1296 
1297 	cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1298 
1299 	dev_dbg(hdev->dev,
1300 		"generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1301 		prop->collective_sob_id, queue_id);
1302 
1303 	cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1304 			prop->collective_sob_id, cb_size, false);
1305 }
1306 
gaudi_collective_wait_init_cs(struct hl_cs * cs)1307 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1308 {
1309 	struct hl_cs_compl *signal_cs_cmpl =
1310 		container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1311 	struct hl_cs_compl *cs_cmpl =
1312 		container_of(cs->fence, struct hl_cs_compl, base_fence);
1313 	struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1314 	struct gaudi_collective_properties *cprop;
1315 	u32 stream, queue_id, sob_group_offset;
1316 	struct gaudi_device *gaudi;
1317 	struct hl_device *hdev;
1318 	struct hl_cs_job *job;
1319 	struct hl_ctx *ctx;
1320 
1321 	ctx = cs->ctx;
1322 	hdev = ctx->hdev;
1323 	gaudi = hdev->asic_specific;
1324 	cprop = &gaudi->collective_props;
1325 
1326 	if (cs->encaps_signals) {
1327 		cs_cmpl->hw_sob = handle->hw_sob;
1328 		/* at this checkpoint we only need the hw_sob pointer
1329 		 * for the completion check before start going over the jobs
1330 		 * of the master/slaves, the sob_value will be taken later on
1331 		 * in gaudi_collective_slave_init_job depends on each
1332 		 * job wait offset value.
1333 		 */
1334 		cs_cmpl->sob_val = 0;
1335 	} else {
1336 		/* copy the SOB id and value of the signal CS */
1337 		cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1338 		cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1339 	}
1340 
1341 	/* check again if the signal cs already completed.
1342 	 * if yes then don't send any wait cs since the hw_sob
1343 	 * could be in reset already. if signal is not completed
1344 	 * then get refcount to hw_sob to prevent resetting the sob
1345 	 * while wait cs is not submitted.
1346 	 * note that this check is protected by two locks,
1347 	 * hw queue lock and completion object lock,
1348 	 * and the same completion object lock also protects
1349 	 * the hw_sob reset handler function.
1350 	 * The hw_queue lock prevent out of sync of hw_sob
1351 	 * refcount value, changed by signal/wait flows.
1352 	 */
1353 	spin_lock(&signal_cs_cmpl->lock);
1354 
1355 	if (completion_done(&cs->signal_fence->completion)) {
1356 		spin_unlock(&signal_cs_cmpl->lock);
1357 		return -EINVAL;
1358 	}
1359 	/* Increment kref since all slave queues are now waiting on it */
1360 	kref_get(&cs_cmpl->hw_sob->kref);
1361 
1362 	spin_unlock(&signal_cs_cmpl->lock);
1363 
1364 	/* Calculate the stream from collective master queue (1st job) */
1365 	job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1366 	stream = job->hw_queue_id % 4;
1367 	sob_group_offset =
1368 		stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1369 
1370 	list_for_each_entry(job, &cs->job_list, cs_node) {
1371 		queue_id = job->hw_queue_id;
1372 
1373 		if (hdev->kernel_queues[queue_id].collective_mode ==
1374 				HL_COLLECTIVE_MASTER)
1375 			gaudi_collective_master_init_job(hdev, job, stream,
1376 						sob_group_offset);
1377 		else
1378 			gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1379 	}
1380 
1381 	cs_cmpl->sob_group = sob_group_offset;
1382 
1383 	/* Handle sob group kref and wraparound */
1384 	kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1385 	cprop->next_sob_group_val[stream]++;
1386 
1387 	if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1388 		/*
1389 		 * Decrement as we reached the max value.
1390 		 * The release function won't be called here as we've
1391 		 * just incremented the refcount.
1392 		 */
1393 		kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1394 				gaudi_sob_group_reset_error);
1395 		cprop->next_sob_group_val[stream] = 1;
1396 		/* only two SOBs are currently in use */
1397 		cprop->curr_sob_group_idx[stream] =
1398 			(cprop->curr_sob_group_idx[stream] + 1) &
1399 							(HL_RSVD_SOBS - 1);
1400 
1401 		gaudi_collective_map_sobs(hdev, stream);
1402 
1403 		dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1404 				cprop->curr_sob_group_idx[stream], stream);
1405 	}
1406 
1407 	mb();
1408 	hl_fence_put(cs->signal_fence);
1409 	cs->signal_fence = NULL;
1410 
1411 	return 0;
1412 }
1413 
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1414 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1415 {
1416 	u32 cacheline_end, additional_commands;
1417 
1418 	cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1419 	additional_commands = sizeof(struct packet_msg_prot) * 2;
1420 
1421 	if (user_cb_size + additional_commands > cacheline_end)
1422 		return cacheline_end - user_cb_size + additional_commands;
1423 	else
1424 		return additional_commands;
1425 }
1426 
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1427 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1428 		struct hl_ctx *ctx, struct hl_cs *cs,
1429 		enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1430 		u32 encaps_signal_offset)
1431 {
1432 	struct hw_queue_properties *hw_queue_prop;
1433 	struct hl_cs_counters_atomic *cntr;
1434 	struct hl_cs_job *job;
1435 	struct hl_cb *cb;
1436 	u32 cb_size;
1437 	bool patched_cb;
1438 
1439 	cntr = &hdev->aggregated_cs_counters;
1440 
1441 	if (mode == HL_COLLECTIVE_MASTER) {
1442 		/* CB size of collective master queue contains
1443 		 * 4 msg short packets for monitor 1 configuration
1444 		 * 1 fence packet
1445 		 * 4 msg short packets for monitor 2 configuration
1446 		 * 1 fence packet
1447 		 * 2 msg prot packets for completion and MSI
1448 		 */
1449 		cb_size = sizeof(struct packet_msg_short) * 8 +
1450 				sizeof(struct packet_fence) * 2 +
1451 				sizeof(struct packet_msg_prot) * 2;
1452 		patched_cb = true;
1453 	} else {
1454 		/* CB size of collective slave queues contains
1455 		 * 4 msg short packets for monitor configuration
1456 		 * 1 fence packet
1457 		 * 1 additional msg short packet for sob signal
1458 		 */
1459 		cb_size = sizeof(struct packet_msg_short) * 5 +
1460 				sizeof(struct packet_fence);
1461 		patched_cb = false;
1462 	}
1463 
1464 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1465 	job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1466 	if (!job) {
1467 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1468 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1469 		dev_err(hdev->dev, "Failed to allocate a new job\n");
1470 		return -ENOMEM;
1471 	}
1472 
1473 	/* Allocate internal mapped CB for non patched CBs */
1474 	cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1475 	if (!cb) {
1476 		atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1477 		atomic64_inc(&cntr->out_of_mem_drop_cnt);
1478 		kfree(job);
1479 		return -EFAULT;
1480 	}
1481 
1482 	job->id = 0;
1483 	job->cs = cs;
1484 	job->user_cb = cb;
1485 	atomic_inc(&job->user_cb->cs_cnt);
1486 	job->user_cb_size = cb_size;
1487 	job->hw_queue_id = queue_id;
1488 
1489 	/* since its guaranteed to have only one chunk in the collective wait
1490 	 * cs, we can use this chunk to set the encapsulated signal offset
1491 	 * in the jobs.
1492 	 */
1493 	if (cs->encaps_signals)
1494 		job->encaps_sig_wait_offset = encaps_signal_offset;
1495 
1496 	/*
1497 	 * No need in parsing, user CB is the patched CB.
1498 	 * We call hl_cb_destroy() out of two reasons - we don't need
1499 	 * the CB in the CB idr anymore and to decrement its refcount as
1500 	 * it was incremented inside hl_cb_kernel_create().
1501 	 */
1502 	if (patched_cb)
1503 		job->patched_cb = job->user_cb;
1504 	else
1505 		job->patched_cb = NULL;
1506 
1507 	job->job_cb_size = job->user_cb_size;
1508 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1509 
1510 	/* increment refcount as for external queues we get completion */
1511 	if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1512 		cs_get(cs);
1513 
1514 	cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1515 
1516 	list_add_tail(&job->cs_node, &cs->job_list);
1517 
1518 	hl_debugfs_add_job(hdev, job);
1519 
1520 	return 0;
1521 }
1522 
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1523 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1524 		struct hl_ctx *ctx, struct hl_cs *cs,
1525 		u32 wait_queue_id, u32 collective_engine_id,
1526 		u32 encaps_signal_offset)
1527 {
1528 	struct gaudi_device *gaudi = hdev->asic_specific;
1529 	struct hw_queue_properties *hw_queue_prop;
1530 	u32 queue_id, collective_queue, num_jobs;
1531 	u32 stream, nic_queue, nic_idx = 0;
1532 	bool skip;
1533 	int i, rc = 0;
1534 
1535 	/* Verify wait queue id is configured as master */
1536 	hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1537 	if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1538 		dev_err(hdev->dev,
1539 			"Queue %d is not configured as collective master\n",
1540 			wait_queue_id);
1541 		return -EINVAL;
1542 	}
1543 
1544 	/* Verify engine id is supported */
1545 	if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1546 			collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1547 		dev_err(hdev->dev,
1548 			"Collective wait does not support engine %u\n",
1549 			collective_engine_id);
1550 		return -EINVAL;
1551 	}
1552 
1553 	stream = wait_queue_id % 4;
1554 
1555 	if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1556 		collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1557 	else
1558 		collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1559 
1560 	num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1561 	nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1562 
1563 	/* First job goes to the collective master queue, it will wait for
1564 	 * the collective slave queues to finish execution.
1565 	 * The synchronization is done using two monitors:
1566 	 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1567 	 * reduction engine (DMA5/TPC7).
1568 	 *
1569 	 * Rest of the jobs goes to the collective slave queues which will
1570 	 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1571 	 */
1572 	for (i = 0 ; i < num_jobs ; i++) {
1573 		if (i == 0) {
1574 			queue_id = wait_queue_id;
1575 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1576 				HL_COLLECTIVE_MASTER, queue_id,
1577 				wait_queue_id, encaps_signal_offset);
1578 		} else {
1579 			if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1580 				if (gaudi->hw_cap_initialized &
1581 					BIT(HW_CAP_NIC_SHIFT + nic_idx))
1582 					skip = false;
1583 				else
1584 					skip = true;
1585 
1586 				queue_id = nic_queue;
1587 				nic_queue += 4;
1588 				nic_idx++;
1589 
1590 				if (skip)
1591 					continue;
1592 			} else {
1593 				queue_id = collective_queue;
1594 			}
1595 
1596 			rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1597 				HL_COLLECTIVE_SLAVE, queue_id,
1598 				wait_queue_id, encaps_signal_offset);
1599 		}
1600 
1601 		if (rc)
1602 			return rc;
1603 	}
1604 
1605 	return rc;
1606 }
1607 
gaudi_late_init(struct hl_device * hdev)1608 static int gaudi_late_init(struct hl_device *hdev)
1609 {
1610 	struct gaudi_device *gaudi = hdev->asic_specific;
1611 	int rc;
1612 
1613 	rc = gaudi->cpucp_info_get(hdev);
1614 	if (rc) {
1615 		dev_err(hdev->dev, "Failed to get cpucp info\n");
1616 		return rc;
1617 	}
1618 
1619 	if ((hdev->card_type == cpucp_card_type_pci) &&
1620 			(hdev->nic_ports_mask & 0x3)) {
1621 		dev_info(hdev->dev,
1622 			"PCI card detected, only 8 ports are enabled\n");
1623 		hdev->nic_ports_mask &= ~0x3;
1624 
1625 		/* Stop and disable unused NIC QMANs */
1626 		WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1627 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1628 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1629 
1630 		WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1631 					NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1632 					NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1633 
1634 		WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1635 		WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1636 
1637 		gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1638 	}
1639 
1640 	rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1641 	if (rc)
1642 		return rc;
1643 
1644 	/* Scrub both SRAM and DRAM */
1645 	rc = hdev->asic_funcs->scrub_device_mem(hdev);
1646 	if (rc)
1647 		goto disable_pci_access;
1648 
1649 	rc = gaudi_fetch_psoc_frequency(hdev);
1650 	if (rc) {
1651 		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1652 		goto disable_pci_access;
1653 	}
1654 
1655 	rc = gaudi_mmu_clear_pgt_range(hdev);
1656 	if (rc) {
1657 		dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1658 		goto disable_pci_access;
1659 	}
1660 
1661 	rc = gaudi_init_tpc_mem(hdev);
1662 	if (rc) {
1663 		dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1664 		goto disable_pci_access;
1665 	}
1666 
1667 	rc = gaudi_collective_init(hdev);
1668 	if (rc) {
1669 		dev_err(hdev->dev, "Failed to init collective\n");
1670 		goto disable_pci_access;
1671 	}
1672 
1673 	/* We only support a single ASID for the user, so for the sake of optimization, just
1674 	 * initialize the ASID one time during device initialization with the fixed value of 1
1675 	 */
1676 	gaudi_mmu_prepare(hdev, 1);
1677 
1678 	hl_fw_set_pll_profile(hdev);
1679 
1680 	return 0;
1681 
1682 disable_pci_access:
1683 	hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1684 
1685 	return rc;
1686 }
1687 
gaudi_late_fini(struct hl_device * hdev)1688 static void gaudi_late_fini(struct hl_device *hdev)
1689 {
1690 	hl_hwmon_release_resources(hdev);
1691 }
1692 
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1693 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1694 {
1695 	dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1696 	void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1697 	int i, j, rc = 0;
1698 
1699 	/*
1700 	 * The device CPU works with 40-bits addresses, while bit 39 must be set
1701 	 * to '1' when accessing the host.
1702 	 * Bits 49:39 of the full host address are saved for a later
1703 	 * configuration of the HW to perform extension to 50 bits.
1704 	 * Because there is a single HW register that holds the extension bits,
1705 	 * these bits must be identical in all allocated range.
1706 	 */
1707 
1708 	for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1709 		virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 								&dma_addr_arr[i],
1711 								GFP_KERNEL | __GFP_ZERO);
1712 		if (!virt_addr_arr[i]) {
1713 			rc = -ENOMEM;
1714 			goto free_dma_mem_arr;
1715 		}
1716 
1717 		end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1718 		if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1719 				GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1720 			break;
1721 	}
1722 
1723 	if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1724 		dev_err(hdev->dev,
1725 			"MSB of CPU accessible DMA memory are not identical in all range\n");
1726 		rc = -EFAULT;
1727 		goto free_dma_mem_arr;
1728 	}
1729 
1730 	hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1731 	hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1732 	hdev->cpu_pci_msb_addr =
1733 		GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1734 
1735 	if (!hdev->asic_prop.fw_security_enabled)
1736 		GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1737 
1738 free_dma_mem_arr:
1739 	for (j = 0 ; j < i ; j++)
1740 		hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1741 						dma_addr_arr[j]);
1742 
1743 	return rc;
1744 }
1745 
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1746 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1747 {
1748 	struct gaudi_device *gaudi = hdev->asic_specific;
1749 	struct gaudi_internal_qman_info *q;
1750 	u32 i;
1751 
1752 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1753 		q = &gaudi->internal_qmans[i];
1754 		if (!q->pq_kernel_addr)
1755 			continue;
1756 		hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1757 	}
1758 }
1759 
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1760 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1761 {
1762 	struct gaudi_device *gaudi = hdev->asic_specific;
1763 	struct gaudi_internal_qman_info *q;
1764 	int rc, i;
1765 
1766 	for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1767 		if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1768 			continue;
1769 
1770 		q = &gaudi->internal_qmans[i];
1771 
1772 		switch (i) {
1773 		case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1774 			q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1775 			break;
1776 		case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1777 			q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1778 			break;
1779 		case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1780 			q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1781 			break;
1782 		case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1783 			q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1784 			break;
1785 		default:
1786 			dev_err(hdev->dev, "Bad internal queue index %d", i);
1787 			rc = -EINVAL;
1788 			goto free_internal_qmans_pq_mem;
1789 		}
1790 
1791 		q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1792 								GFP_KERNEL | __GFP_ZERO);
1793 		if (!q->pq_kernel_addr) {
1794 			rc = -ENOMEM;
1795 			goto free_internal_qmans_pq_mem;
1796 		}
1797 	}
1798 
1799 	return 0;
1800 
1801 free_internal_qmans_pq_mem:
1802 	gaudi_free_internal_qmans_pq_mem(hdev);
1803 	return rc;
1804 }
1805 
gaudi_set_pci_memory_regions(struct hl_device * hdev)1806 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1807 {
1808 	struct asic_fixed_properties *prop = &hdev->asic_prop;
1809 	struct pci_mem_region *region;
1810 
1811 	/* CFG */
1812 	region = &hdev->pci_mem_region[PCI_REGION_CFG];
1813 	region->region_base = CFG_BASE;
1814 	region->region_size = CFG_SIZE;
1815 	region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1816 	region->bar_size = CFG_BAR_SIZE;
1817 	region->bar_id = CFG_BAR_ID;
1818 	region->used = 1;
1819 
1820 	/* SRAM */
1821 	region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1822 	region->region_base = SRAM_BASE_ADDR;
1823 	region->region_size = SRAM_SIZE;
1824 	region->offset_in_bar = 0;
1825 	region->bar_size = SRAM_BAR_SIZE;
1826 	region->bar_id = SRAM_BAR_ID;
1827 	region->used = 1;
1828 
1829 	/* DRAM */
1830 	region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1831 	region->region_base = DRAM_PHYS_BASE;
1832 	region->region_size = hdev->asic_prop.dram_size;
1833 	region->offset_in_bar = 0;
1834 	region->bar_size = prop->dram_pci_bar_size;
1835 	region->bar_id = HBM_BAR_ID;
1836 	region->used = 1;
1837 
1838 	/* SP SRAM */
1839 	region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1840 	region->region_base = PSOC_SCRATCHPAD_ADDR;
1841 	region->region_size = PSOC_SCRATCHPAD_SIZE;
1842 	region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1843 	region->bar_size = CFG_BAR_SIZE;
1844 	region->bar_id = CFG_BAR_ID;
1845 	region->used = 1;
1846 }
1847 
gaudi_sw_init(struct hl_device * hdev)1848 static int gaudi_sw_init(struct hl_device *hdev)
1849 {
1850 	struct gaudi_device *gaudi;
1851 	u32 i, event_id = 0;
1852 	int rc;
1853 
1854 	/* Allocate device structure */
1855 	gaudi = kzalloc_obj(*gaudi);
1856 	if (!gaudi)
1857 		return -ENOMEM;
1858 
1859 	for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1860 		if (gaudi_irq_map_table[i].valid) {
1861 			if (event_id == GAUDI_EVENT_SIZE) {
1862 				dev_err(hdev->dev,
1863 					"Event array exceeds the limit of %u events\n",
1864 					GAUDI_EVENT_SIZE);
1865 				rc = -EINVAL;
1866 				goto free_gaudi_device;
1867 			}
1868 
1869 			gaudi->events[event_id++] =
1870 					gaudi_irq_map_table[i].fc_id;
1871 		}
1872 	}
1873 
1874 	gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1875 
1876 	hdev->asic_specific = gaudi;
1877 
1878 	/* Create DMA pool for small allocations */
1879 	hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1880 			&hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1881 	if (!hdev->dma_pool) {
1882 		dev_err(hdev->dev, "failed to create DMA pool\n");
1883 		rc = -ENOMEM;
1884 		goto free_gaudi_device;
1885 	}
1886 
1887 	rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1888 	if (rc)
1889 		goto free_dma_pool;
1890 
1891 	hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1892 	if (!hdev->cpu_accessible_dma_pool) {
1893 		dev_err(hdev->dev,
1894 			"Failed to create CPU accessible DMA pool\n");
1895 		rc = -ENOMEM;
1896 		goto free_cpu_dma_mem;
1897 	}
1898 
1899 	rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1900 				(uintptr_t) hdev->cpu_accessible_dma_mem,
1901 				HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1902 	if (rc) {
1903 		dev_err(hdev->dev,
1904 			"Failed to add memory to CPU accessible DMA pool\n");
1905 		rc = -EFAULT;
1906 		goto free_cpu_accessible_dma_pool;
1907 	}
1908 
1909 	rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1910 	if (rc)
1911 		goto free_cpu_accessible_dma_pool;
1912 
1913 	spin_lock_init(&gaudi->hw_queues_lock);
1914 
1915 	hdev->supports_sync_stream = true;
1916 	hdev->supports_coresight = true;
1917 	hdev->supports_staged_submission = true;
1918 	hdev->supports_wait_for_multi_cs = true;
1919 
1920 	hdev->asic_funcs->set_pci_memory_regions(hdev);
1921 	hdev->stream_master_qid_arr =
1922 				hdev->asic_funcs->get_stream_master_qid_arr();
1923 	hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1924 
1925 	return 0;
1926 
1927 free_cpu_accessible_dma_pool:
1928 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1929 free_cpu_dma_mem:
1930 	if (!hdev->asic_prop.fw_security_enabled)
1931 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1932 					hdev->cpu_pci_msb_addr);
1933 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1934 					hdev->cpu_accessible_dma_address);
1935 free_dma_pool:
1936 	dma_pool_destroy(hdev->dma_pool);
1937 free_gaudi_device:
1938 	kfree(gaudi);
1939 	return rc;
1940 }
1941 
gaudi_sw_fini(struct hl_device * hdev)1942 static int gaudi_sw_fini(struct hl_device *hdev)
1943 {
1944 	struct gaudi_device *gaudi = hdev->asic_specific;
1945 
1946 	gaudi_free_internal_qmans_pq_mem(hdev);
1947 
1948 	gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1949 
1950 	if (!hdev->asic_prop.fw_security_enabled)
1951 		GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1952 					hdev->cpu_pci_msb_addr);
1953 
1954 	hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1955 					hdev->cpu_accessible_dma_address);
1956 
1957 	dma_pool_destroy(hdev->dma_pool);
1958 
1959 	kfree(gaudi);
1960 
1961 	return 0;
1962 }
1963 
gaudi_irq_handler_single(int irq,void * arg)1964 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1965 {
1966 	struct hl_device *hdev = arg;
1967 	int i;
1968 
1969 	if (hdev->disabled)
1970 		return IRQ_HANDLED;
1971 
1972 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1973 		hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1974 
1975 	hl_irq_handler_eq(irq, &hdev->event_queue);
1976 
1977 	return IRQ_HANDLED;
1978 }
1979 
1980 /*
1981  * For backward compatibility, new MSI interrupts should be set after the
1982  * existing CPU and NIC interrupts.
1983  */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1984 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1985 				bool cpu_eq)
1986 {
1987 	int msi_vec;
1988 
1989 	if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1990 		dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1991 				GAUDI_EVENT_QUEUE_MSI_IDX);
1992 
1993 	msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1994 			(nr + NIC_NUMBER_OF_ENGINES + 1);
1995 
1996 	return pci_irq_vector(hdev->pdev, msi_vec);
1997 }
1998 
gaudi_enable_msi_single(struct hl_device * hdev)1999 static int gaudi_enable_msi_single(struct hl_device *hdev)
2000 {
2001 	int rc, irq;
2002 
2003 	dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2004 
2005 	irq = gaudi_pci_irq_vector(hdev, 0, false);
2006 	rc = request_irq(irq, gaudi_irq_handler_single, 0,
2007 			"gaudi single msi", hdev);
2008 	if (rc)
2009 		dev_err(hdev->dev,
2010 			"Failed to request single MSI IRQ\n");
2011 
2012 	return rc;
2013 }
2014 
gaudi_enable_msi(struct hl_device * hdev)2015 static int gaudi_enable_msi(struct hl_device *hdev)
2016 {
2017 	struct gaudi_device *gaudi = hdev->asic_specific;
2018 	int rc;
2019 
2020 	if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2021 		return 0;
2022 
2023 	rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2024 	if (rc < 0) {
2025 		dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2026 		return rc;
2027 	}
2028 
2029 	rc = gaudi_enable_msi_single(hdev);
2030 	if (rc)
2031 		goto free_pci_irq_vectors;
2032 
2033 	gaudi->hw_cap_initialized |= HW_CAP_MSI;
2034 
2035 	return 0;
2036 
2037 free_pci_irq_vectors:
2038 	pci_free_irq_vectors(hdev->pdev);
2039 	return rc;
2040 }
2041 
gaudi_sync_irqs(struct hl_device * hdev)2042 static void gaudi_sync_irqs(struct hl_device *hdev)
2043 {
2044 	struct gaudi_device *gaudi = hdev->asic_specific;
2045 
2046 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2047 		return;
2048 
2049 	/* Wait for all pending IRQs to be finished */
2050 	synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2051 }
2052 
gaudi_disable_msi(struct hl_device * hdev)2053 static void gaudi_disable_msi(struct hl_device *hdev)
2054 {
2055 	struct gaudi_device *gaudi = hdev->asic_specific;
2056 
2057 	if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2058 		return;
2059 
2060 	gaudi_sync_irqs(hdev);
2061 	free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2062 	pci_free_irq_vectors(hdev->pdev);
2063 
2064 	gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2065 }
2066 
gaudi_init_scrambler_sram(struct hl_device * hdev)2067 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2068 {
2069 	struct gaudi_device *gaudi = hdev->asic_specific;
2070 
2071 	if (hdev->asic_prop.fw_security_enabled)
2072 		return;
2073 
2074 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2075 						CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2076 		return;
2077 
2078 	if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2079 		return;
2080 
2081 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2082 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2084 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2086 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2088 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2090 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2092 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2094 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2096 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097 
2098 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2099 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2101 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2103 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2105 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2107 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2109 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2111 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2113 			1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114 
2115 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2116 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2118 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2120 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2122 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2124 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2126 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2128 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2130 			1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131 
2132 	gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2133 }
2134 
gaudi_init_scrambler_hbm(struct hl_device * hdev)2135 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2136 {
2137 	struct gaudi_device *gaudi = hdev->asic_specific;
2138 
2139 	if (hdev->asic_prop.fw_security_enabled)
2140 		return;
2141 
2142 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2143 					CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2144 		return;
2145 
2146 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2147 		return;
2148 
2149 	WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2150 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2151 	WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2152 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153 	WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2154 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 	WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2156 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 	WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2158 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 	WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2160 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 	WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2162 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 	WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2164 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165 
2166 	WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2167 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168 	WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2169 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170 	WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2171 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 	WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2173 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 	WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2175 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 	WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2177 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 	WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2179 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 	WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2181 			1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182 
2183 	WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2184 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2185 	WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2186 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187 	WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2188 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 	WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2190 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 	WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2192 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 	WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2194 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 	WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2196 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 	WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2198 			1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199 
2200 	gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2201 }
2202 
gaudi_init_e2e(struct hl_device * hdev)2203 static void gaudi_init_e2e(struct hl_device *hdev)
2204 {
2205 	if (hdev->asic_prop.fw_security_enabled)
2206 		return;
2207 
2208 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2209 					CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2210 		return;
2211 
2212 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2213 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2214 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2215 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2216 
2217 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2218 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2219 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2220 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2221 
2222 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2223 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2224 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2225 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2226 
2227 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2228 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2229 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2230 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2231 
2232 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2233 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2234 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2235 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2236 
2237 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2238 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2239 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2240 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2241 
2242 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2243 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2244 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2245 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2246 
2247 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2248 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2249 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2250 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2251 
2252 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2253 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2254 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2255 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2256 
2257 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2258 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2259 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2260 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2261 
2262 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2263 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2264 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2265 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2266 
2267 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2268 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2269 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2270 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2271 
2272 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2273 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2274 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2275 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2276 
2277 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2278 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2279 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2280 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2281 
2282 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2283 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2284 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2285 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2286 
2287 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2288 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2289 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2290 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2291 
2292 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2296 
2297 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2301 
2302 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2303 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2304 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2305 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2306 
2307 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2308 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2309 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2310 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2311 
2312 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2313 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2314 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2315 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2316 
2317 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2318 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2319 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2320 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2321 
2322 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2323 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2324 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2325 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2326 
2327 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2328 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2329 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2330 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2331 
2332 	WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 	WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336 
2337 	WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 	WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341 
2342 	WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 	WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346 
2347 	WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 	WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351 
2352 	WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 	WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356 
2357 	WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 	WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361 
2362 	WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 	WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366 
2367 	WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 	WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371 
2372 	WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 	WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376 
2377 	WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 	WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381 
2382 	WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 	WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386 
2387 	WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389 	WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2391 
2392 	WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394 	WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2396 
2397 	WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399 	WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2401 
2402 	WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404 	WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2406 
2407 	WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408 			1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409 	WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410 			1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2411 
2412 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 	WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416 
2417 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 	WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421 
2422 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 	WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426 
2427 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429 	WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2431 
2432 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434 	WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2436 
2437 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439 	WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2441 
2442 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444 	WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2446 
2447 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448 			1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449 	WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450 			1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2451 }
2452 
gaudi_init_hbm_cred(struct hl_device * hdev)2453 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2454 {
2455 	u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2456 
2457 	if (hdev->asic_prop.fw_security_enabled)
2458 		return;
2459 
2460 	if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461 						CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2462 		return;
2463 
2464 	hbm0_wr = 0x33333333;
2465 	hbm0_rd = 0x77777777;
2466 	hbm1_wr = 0x55555555;
2467 	hbm1_rd = 0xDDDDDDDD;
2468 
2469 	WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470 	WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471 	WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472 	WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2473 
2474 	WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475 	WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476 	WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477 	WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2478 
2479 	WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480 	WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481 	WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482 	WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2483 
2484 	WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485 	WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486 	WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487 	WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2488 
2489 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501 
2502 	WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505 	WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 	WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 	WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512 			(1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 			(1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 }
2515 
gaudi_init_golden_registers(struct hl_device * hdev)2516 static void gaudi_init_golden_registers(struct hl_device *hdev)
2517 {
2518 	u32 tpc_offset;
2519 	int tpc_id, i;
2520 
2521 	gaudi_init_e2e(hdev);
2522 	gaudi_init_hbm_cred(hdev);
2523 
2524 	for (tpc_id = 0, tpc_offset = 0;
2525 				tpc_id < TPC_NUMBER_OF_ENGINES;
2526 				tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527 		/* Mask all arithmetic interrupts from TPC */
2528 		WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2529 		/* Set 16 cache lines */
2530 		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531 				ICACHE_FETCH_LINE_NUM, 2);
2532 	}
2533 
2534 	/* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535 	for (i = 0 ; i < 128 ; i += 8)
2536 		writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2537 
2538 	WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539 	WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 	WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 	WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 }
2543 
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545 					int qman_id, dma_addr_t qman_pq_addr)
2546 {
2547 	struct cpu_dyn_regs *dyn_regs =
2548 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551 	u32 q_off, dma_qm_offset;
2552 	u32 dma_qm_err_cfg, irq_handler_offset;
2553 
2554 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555 
2556 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560 	so_base_en_lo = lower_32_bits(CFG_BASE +
2561 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562 	so_base_en_hi = upper_32_bits(CFG_BASE +
2563 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2569 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2571 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572 
2573 	q_off = dma_qm_offset + qman_id * 4;
2574 
2575 	WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576 	WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2577 
2578 	WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579 	WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580 	WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2581 
2582 	WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583 	WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584 							QMAN_LDMA_SRC_OFFSET);
2585 	WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586 							QMAN_LDMA_DST_OFFSET);
2587 
2588 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593 	WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595 	WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2596 
2597 	WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2598 
2599 	/* The following configuration is needed only once per QMAN */
2600 	if (qman_id == 0) {
2601 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2604 
2605 		/* Configure RAZWI IRQ */
2606 		dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607 		if (hdev->stop_on_err)
2608 			dma_qm_err_cfg |=
2609 				PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2610 
2611 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2612 
2613 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614 			lower_32_bits(CFG_BASE + irq_handler_offset));
2615 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616 			upper_32_bits(CFG_BASE + irq_handler_offset));
2617 
2618 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2620 									dma_id);
2621 
2622 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623 				QM_ARB_ERR_MSG_EN_MASK);
2624 
2625 		/* Set timeout to maximum */
2626 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2627 
2628 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2629 				QMAN_EXTERNAL_MAKE_TRUSTED);
2630 
2631 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2632 	}
2633 }
2634 
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2635 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2636 {
2637 	struct cpu_dyn_regs *dyn_regs =
2638 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2639 	u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2640 	u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2641 	u32 irq_handler_offset;
2642 
2643 	/* Set to maximum possible according to physical size */
2644 	WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2645 	WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2646 
2647 	/* WA for H/W bug H3-2116 */
2648 	WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2649 
2650 	/* STOP_ON bit implies no completion to operation in case of RAZWI */
2651 	if (hdev->stop_on_err)
2652 		dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2653 
2654 	WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2655 
2656 	irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2657 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2658 			le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2659 
2660 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2661 		lower_32_bits(CFG_BASE + irq_handler_offset));
2662 	WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2663 		upper_32_bits(CFG_BASE + irq_handler_offset));
2664 
2665 	WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2666 		gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2667 	WREG32(mmDMA0_CORE_PROT + dma_offset,
2668 			1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2669 	/* If the channel is secured, it should be in MMU bypass mode */
2670 	WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2671 			1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2672 	WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2673 }
2674 
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2675 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2676 				u32 enable_mask)
2677 {
2678 	u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2679 
2680 	WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2681 }
2682 
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2683 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2684 {
2685 	struct gaudi_device *gaudi = hdev->asic_specific;
2686 	struct hl_hw_queue *q;
2687 	int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2688 
2689 	if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2690 		return;
2691 
2692 	for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2693 		dma_id = gaudi_dma_assignment[i];
2694 		/*
2695 		 * For queues after the CPU Q need to add 1 to get the correct
2696 		 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2697 		 * order to get the correct MSI register.
2698 		 */
2699 		if (dma_id > 1) {
2700 			cpu_skip = 1;
2701 			nic_skip = NIC_NUMBER_OF_ENGINES;
2702 		} else {
2703 			cpu_skip = 0;
2704 			nic_skip = 0;
2705 		}
2706 
2707 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2708 			q_idx = 4 * dma_id + j + cpu_skip;
2709 			q = &hdev->kernel_queues[q_idx];
2710 			q->cq_id = cq_id++;
2711 			q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2712 			gaudi_init_pci_dma_qman(hdev, dma_id, j,
2713 						q->bus_address);
2714 		}
2715 
2716 		gaudi_init_dma_core(hdev, dma_id);
2717 
2718 		gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2719 	}
2720 
2721 	gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2722 }
2723 
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2724 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2725 					int qman_id, u64 qman_base_addr)
2726 {
2727 	struct cpu_dyn_regs *dyn_regs =
2728 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2729 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2730 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2731 	u32 dma_qm_err_cfg, irq_handler_offset;
2732 	u32 q_off, dma_qm_offset;
2733 
2734 	dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2735 
2736 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
2737 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2738 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
2739 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740 	so_base_en_lo = lower_32_bits(CFG_BASE +
2741 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2742 	so_base_en_hi = upper_32_bits(CFG_BASE +
2743 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2745 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2747 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 	so_base_ws_lo = lower_32_bits(CFG_BASE +
2749 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750 	so_base_ws_hi = upper_32_bits(CFG_BASE +
2751 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752 
2753 	q_off = dma_qm_offset + qman_id * 4;
2754 
2755 	if (qman_id < 4) {
2756 		WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2757 					lower_32_bits(qman_base_addr));
2758 		WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2759 					upper_32_bits(qman_base_addr));
2760 
2761 		WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2762 		WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763 		WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764 
2765 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2766 							QMAN_CPDMA_SIZE_OFFSET);
2767 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2768 							QMAN_CPDMA_SRC_OFFSET);
2769 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2770 							QMAN_CPDMA_DST_OFFSET);
2771 	} else {
2772 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2773 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2774 				le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2775 
2776 		WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2777 							QMAN_LDMA_SIZE_OFFSET);
2778 		WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2779 							QMAN_LDMA_SRC_OFFSET);
2780 		WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2781 							QMAN_LDMA_DST_OFFSET);
2782 
2783 		/* Configure RAZWI IRQ */
2784 		dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2785 		if (hdev->stop_on_err)
2786 			dma_qm_err_cfg |=
2787 				HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2788 
2789 		WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2790 
2791 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2792 			lower_32_bits(CFG_BASE + irq_handler_offset));
2793 		WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2794 			upper_32_bits(CFG_BASE + irq_handler_offset));
2795 
2796 		WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2797 			gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2798 									dma_id);
2799 
2800 		WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2801 				QM_ARB_ERR_MSG_EN_MASK);
2802 
2803 		/* Set timeout to maximum */
2804 		WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2805 
2806 		WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807 		WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808 				QMAN_INTERNAL_MAKE_TRUSTED);
2809 	}
2810 
2811 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812 	WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814 	WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815 
2816 	/* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817 	if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819 				mtr_base_ws_lo);
2820 		WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821 				mtr_base_ws_hi);
2822 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823 				so_base_ws_lo);
2824 		WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825 				so_base_ws_hi);
2826 	}
2827 }
2828 
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2829 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830 {
2831 	struct gaudi_device *gaudi = hdev->asic_specific;
2832 	struct gaudi_internal_qman_info *q;
2833 	u64 qman_base_addr;
2834 	int i, j, dma_id, internal_q_index;
2835 
2836 	if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837 		return;
2838 
2839 	for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841 
2842 		for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843 			 /*
2844 			  * Add the CPU queue in order to get the correct queue
2845 			  * number as all internal queue are placed after it
2846 			  */
2847 			internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848 
2849 			q = &gaudi->internal_qmans[internal_q_index];
2850 			qman_base_addr = (u64) q->pq_dma_addr;
2851 			gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852 						qman_base_addr);
2853 		}
2854 
2855 		/* Initializing lower CP for HBM DMA QMAN */
2856 		gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857 
2858 		gaudi_init_dma_core(hdev, dma_id);
2859 
2860 		gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861 	}
2862 
2863 	gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864 }
2865 
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2866 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867 					int qman_id, u64 qman_base_addr)
2868 {
2869 	struct cpu_dyn_regs *dyn_regs =
2870 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871 	u32 mtr_base_lo, mtr_base_hi;
2872 	u32 so_base_lo, so_base_hi;
2873 	u32 irq_handler_offset;
2874 	u32 q_off, mme_id;
2875 	u32 mme_qm_err_cfg;
2876 
2877 	mtr_base_lo = lower_32_bits(CFG_BASE +
2878 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879 	mtr_base_hi = upper_32_bits(CFG_BASE +
2880 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 	so_base_lo = lower_32_bits(CFG_BASE +
2882 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883 	so_base_hi = upper_32_bits(CFG_BASE +
2884 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885 
2886 	q_off = mme_offset + qman_id * 4;
2887 
2888 	if (qman_id < 4) {
2889 		WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890 					lower_32_bits(qman_base_addr));
2891 		WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892 					upper_32_bits(qman_base_addr));
2893 
2894 		WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895 		WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896 		WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897 
2898 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899 							QMAN_CPDMA_SIZE_OFFSET);
2900 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901 							QMAN_CPDMA_SRC_OFFSET);
2902 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903 							QMAN_CPDMA_DST_OFFSET);
2904 	} else {
2905 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907 				le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908 
2909 		WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910 							QMAN_LDMA_SIZE_OFFSET);
2911 		WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912 							QMAN_LDMA_SRC_OFFSET);
2913 		WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914 							QMAN_LDMA_DST_OFFSET);
2915 
2916 		/* Configure RAZWI IRQ */
2917 		mme_id = mme_offset /
2918 				(mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919 
2920 		mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921 		if (hdev->stop_on_err)
2922 			mme_qm_err_cfg |=
2923 				MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924 
2925 		WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926 
2927 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928 			lower_32_bits(CFG_BASE + irq_handler_offset));
2929 		WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930 			upper_32_bits(CFG_BASE + irq_handler_offset));
2931 
2932 		WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933 			gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934 									mme_id);
2935 
2936 		WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937 				QM_ARB_ERR_MSG_EN_MASK);
2938 
2939 		/* Set timeout to maximum */
2940 		WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2941 
2942 		WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2943 		WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2944 				QMAN_INTERNAL_MAKE_TRUSTED);
2945 	}
2946 
2947 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2948 	WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2949 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2950 	WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2951 }
2952 
gaudi_init_mme_qmans(struct hl_device * hdev)2953 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2954 {
2955 	struct gaudi_device *gaudi = hdev->asic_specific;
2956 	struct gaudi_internal_qman_info *q;
2957 	u64 qman_base_addr;
2958 	u32 mme_offset;
2959 	int i, internal_q_index;
2960 
2961 	if (gaudi->hw_cap_initialized & HW_CAP_MME)
2962 		return;
2963 
2964 	/*
2965 	 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2966 	 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2967 	 */
2968 
2969 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2970 
2971 	for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2972 		internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2973 		q = &gaudi->internal_qmans[internal_q_index];
2974 		qman_base_addr = (u64) q->pq_dma_addr;
2975 		gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2976 					qman_base_addr);
2977 		if (i == 3)
2978 			mme_offset = 0;
2979 	}
2980 
2981 	/* Initializing lower CP for MME QMANs */
2982 	mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2983 	gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2984 	gaudi_init_mme_qman(hdev, 0, 4, 0);
2985 
2986 	WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2987 	WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 
2989 	gaudi->hw_cap_initialized |= HW_CAP_MME;
2990 }
2991 
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2992 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2993 				int qman_id, u64 qman_base_addr)
2994 {
2995 	struct cpu_dyn_regs *dyn_regs =
2996 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2997 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2998 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2999 	u32 tpc_qm_err_cfg, irq_handler_offset;
3000 	u32 q_off, tpc_id;
3001 
3002 	mtr_base_en_lo = lower_32_bits(CFG_BASE +
3003 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3004 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3005 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006 	so_base_en_lo = lower_32_bits(CFG_BASE +
3007 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3008 	so_base_en_hi = upper_32_bits(CFG_BASE +
3009 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010 	mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3011 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3012 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3013 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 	so_base_ws_lo = lower_32_bits(CFG_BASE +
3015 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3017 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018 
3019 	q_off = tpc_offset + qman_id * 4;
3020 
3021 	tpc_id = tpc_offset /
3022 			(mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3023 
3024 	if (qman_id < 4) {
3025 		WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3026 					lower_32_bits(qman_base_addr));
3027 		WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3028 					upper_32_bits(qman_base_addr));
3029 
3030 		WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3031 		WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3032 		WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3033 
3034 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3035 							QMAN_CPDMA_SIZE_OFFSET);
3036 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3037 							QMAN_CPDMA_SRC_OFFSET);
3038 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3039 							QMAN_CPDMA_DST_OFFSET);
3040 	} else {
3041 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3042 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3043 				le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3044 
3045 		WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3046 							QMAN_LDMA_SIZE_OFFSET);
3047 		WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3048 							QMAN_LDMA_SRC_OFFSET);
3049 		WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3050 							QMAN_LDMA_DST_OFFSET);
3051 
3052 		/* Configure RAZWI IRQ */
3053 		tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3054 		if (hdev->stop_on_err)
3055 			tpc_qm_err_cfg |=
3056 				TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3057 
3058 		WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3059 
3060 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3061 			lower_32_bits(CFG_BASE + irq_handler_offset));
3062 		WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3063 			upper_32_bits(CFG_BASE + irq_handler_offset));
3064 
3065 		WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3066 			gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3067 									tpc_id);
3068 
3069 		WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3070 				QM_ARB_ERR_MSG_EN_MASK);
3071 
3072 		/* Set timeout to maximum */
3073 		WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3074 
3075 		WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3076 		WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3077 				QMAN_INTERNAL_MAKE_TRUSTED);
3078 	}
3079 
3080 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3081 	WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3082 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3083 	WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3084 
3085 	/* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3086 	if (tpc_id == 6) {
3087 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3088 				mtr_base_ws_lo);
3089 		WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3090 				mtr_base_ws_hi);
3091 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3092 				so_base_ws_lo);
3093 		WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3094 				so_base_ws_hi);
3095 	}
3096 }
3097 
gaudi_init_tpc_qmans(struct hl_device * hdev)3098 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3099 {
3100 	struct gaudi_device *gaudi = hdev->asic_specific;
3101 	struct gaudi_internal_qman_info *q;
3102 	u64 qman_base_addr;
3103 	u32 so_base_hi, tpc_offset = 0;
3104 	u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3105 			mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3106 	int i, tpc_id, internal_q_index;
3107 
3108 	if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3109 		return;
3110 
3111 	so_base_hi = upper_32_bits(CFG_BASE +
3112 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3113 
3114 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3115 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3116 			internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3117 						tpc_id * QMAN_STREAMS + i;
3118 			q = &gaudi->internal_qmans[internal_q_index];
3119 			qman_base_addr = (u64) q->pq_dma_addr;
3120 			gaudi_init_tpc_qman(hdev, tpc_offset, i,
3121 						qman_base_addr);
3122 
3123 			if (i == 3) {
3124 				/* Initializing lower CP for TPC QMAN */
3125 				gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3126 
3127 				/* Enable the QMAN and TPC channel */
3128 				WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3129 						QMAN_TPC_ENABLE);
3130 			}
3131 		}
3132 
3133 		WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3134 				so_base_hi);
3135 
3136 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3137 
3138 		gaudi->hw_cap_initialized |=
3139 				FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3140 	}
3141 }
3142 
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3143 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3144 				int qman_id, u64 qman_base_addr, int nic_id)
3145 {
3146 	struct cpu_dyn_regs *dyn_regs =
3147 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3148 	u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3149 	u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3150 	u32 nic_qm_err_cfg, irq_handler_offset;
3151 	u32 q_off;
3152 
3153 	mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3154 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3155 	mtr_base_en_hi = upper_32_bits(CFG_BASE +
3156 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157 	so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3159 	so_base_en_hi = upper_32_bits(CFG_BASE +
3160 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161 	mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163 	mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3164 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 	so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167 	so_base_ws_hi = upper_32_bits(CFG_BASE +
3168 				mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169 
3170 	q_off = nic_offset + qman_id * 4;
3171 
3172 	WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3173 	WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3174 
3175 	WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3176 	WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3177 	WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3178 
3179 	WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3180 							QMAN_LDMA_SIZE_OFFSET);
3181 	WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3182 							QMAN_LDMA_SRC_OFFSET);
3183 	WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3184 							QMAN_LDMA_DST_OFFSET);
3185 
3186 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3187 	WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3188 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3189 	WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3190 
3191 	/* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3192 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3193 	WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3194 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3195 	WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3196 
3197 	if (qman_id == 0) {
3198 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3199 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3200 				le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3201 
3202 		/* Configure RAZWI IRQ */
3203 		nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3204 		if (hdev->stop_on_err)
3205 			nic_qm_err_cfg |=
3206 				NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3207 
3208 		WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3209 
3210 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3211 			lower_32_bits(CFG_BASE + irq_handler_offset));
3212 		WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3213 			upper_32_bits(CFG_BASE + irq_handler_offset));
3214 
3215 		WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3216 			gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3217 									nic_id);
3218 
3219 		WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3220 				QM_ARB_ERR_MSG_EN_MASK);
3221 
3222 		/* Set timeout to maximum */
3223 		WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3224 
3225 		WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3226 		WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3227 				QMAN_INTERNAL_MAKE_TRUSTED);
3228 	}
3229 }
3230 
gaudi_init_nic_qmans(struct hl_device * hdev)3231 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3232 {
3233 	struct gaudi_device *gaudi = hdev->asic_specific;
3234 	struct gaudi_internal_qman_info *q;
3235 	u64 qman_base_addr;
3236 	u32 nic_offset = 0;
3237 	u32 nic_delta_between_qmans =
3238 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3239 	u32 nic_delta_between_nics =
3240 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241 	int i, nic_id, internal_q_index;
3242 
3243 	if (!hdev->nic_ports_mask)
3244 		return;
3245 
3246 	if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3247 		return;
3248 
3249 	dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3250 
3251 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3252 		if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3253 			nic_offset += nic_delta_between_qmans;
3254 			if (nic_id & 1) {
3255 				nic_offset -= (nic_delta_between_qmans * 2);
3256 				nic_offset += nic_delta_between_nics;
3257 			}
3258 			continue;
3259 		}
3260 
3261 		for (i = 0 ; i < QMAN_STREAMS ; i++) {
3262 			internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3263 						nic_id * QMAN_STREAMS + i;
3264 			q = &gaudi->internal_qmans[internal_q_index];
3265 			qman_base_addr = (u64) q->pq_dma_addr;
3266 			gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3267 						qman_base_addr, nic_id);
3268 		}
3269 
3270 		/* Enable the QMAN */
3271 		WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3272 
3273 		nic_offset += nic_delta_between_qmans;
3274 		if (nic_id & 1) {
3275 			nic_offset -= (nic_delta_between_qmans * 2);
3276 			nic_offset += nic_delta_between_nics;
3277 		}
3278 
3279 		gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3280 	}
3281 }
3282 
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3283 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3284 {
3285 	struct gaudi_device *gaudi = hdev->asic_specific;
3286 
3287 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3288 		return;
3289 
3290 	WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3291 	WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3292 	WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3293 }
3294 
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3295 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3296 {
3297 	struct gaudi_device *gaudi = hdev->asic_specific;
3298 
3299 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3300 		return;
3301 
3302 	WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3303 	WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3304 	WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3305 	WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3306 	WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3307 }
3308 
gaudi_disable_mme_qmans(struct hl_device * hdev)3309 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3310 {
3311 	struct gaudi_device *gaudi = hdev->asic_specific;
3312 
3313 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3314 		return;
3315 
3316 	WREG32(mmMME2_QM_GLBL_CFG0, 0);
3317 	WREG32(mmMME0_QM_GLBL_CFG0, 0);
3318 }
3319 
gaudi_disable_tpc_qmans(struct hl_device * hdev)3320 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3321 {
3322 	struct gaudi_device *gaudi = hdev->asic_specific;
3323 	u32 tpc_offset = 0;
3324 	int tpc_id;
3325 
3326 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3327 		return;
3328 
3329 	for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3330 		WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3331 		tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3332 	}
3333 }
3334 
gaudi_disable_nic_qmans(struct hl_device * hdev)3335 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3336 {
3337 	struct gaudi_device *gaudi = hdev->asic_specific;
3338 	u32 nic_mask, nic_offset = 0;
3339 	u32 nic_delta_between_qmans =
3340 			mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3341 	u32 nic_delta_between_nics =
3342 			mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343 	int nic_id;
3344 
3345 	for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3346 		nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3347 
3348 		if (gaudi->hw_cap_initialized & nic_mask)
3349 			WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3350 
3351 		nic_offset += nic_delta_between_qmans;
3352 		if (nic_id & 1) {
3353 			nic_offset -= (nic_delta_between_qmans * 2);
3354 			nic_offset += nic_delta_between_nics;
3355 		}
3356 	}
3357 }
3358 
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3359 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3360 {
3361 	struct gaudi_device *gaudi = hdev->asic_specific;
3362 
3363 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3364 		return;
3365 
3366 	/* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3367 	WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3368 	WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 	WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 }
3371 
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3372 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3373 {
3374 	struct gaudi_device *gaudi = hdev->asic_specific;
3375 
3376 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3377 		return;
3378 
3379 	/* Stop CPs of HBM DMA QMANs */
3380 
3381 	WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3382 	WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 	WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 	WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 	WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 }
3387 
gaudi_stop_mme_qmans(struct hl_device * hdev)3388 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3389 {
3390 	struct gaudi_device *gaudi = hdev->asic_specific;
3391 
3392 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3393 		return;
3394 
3395 	/* Stop CPs of MME QMANs */
3396 	WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3397 	WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 }
3399 
gaudi_stop_tpc_qmans(struct hl_device * hdev)3400 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3401 {
3402 	struct gaudi_device *gaudi = hdev->asic_specific;
3403 
3404 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3405 		return;
3406 
3407 	WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3408 	WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 	WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 	WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 	WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 	WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 	WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 	WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 }
3416 
gaudi_stop_nic_qmans(struct hl_device * hdev)3417 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3418 {
3419 	struct gaudi_device *gaudi = hdev->asic_specific;
3420 
3421 	/* Stop upper CPs of QMANs */
3422 
3423 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3424 		WREG32(mmNIC0_QM0_GLBL_CFG1,
3425 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3426 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3427 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3428 
3429 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3430 		WREG32(mmNIC0_QM1_GLBL_CFG1,
3431 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3432 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3433 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3434 
3435 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3436 		WREG32(mmNIC1_QM0_GLBL_CFG1,
3437 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3438 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3439 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3440 
3441 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3442 		WREG32(mmNIC1_QM1_GLBL_CFG1,
3443 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3444 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3445 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3446 
3447 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3448 		WREG32(mmNIC2_QM0_GLBL_CFG1,
3449 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3450 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3451 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3452 
3453 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3454 		WREG32(mmNIC2_QM1_GLBL_CFG1,
3455 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3456 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3457 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3458 
3459 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3460 		WREG32(mmNIC3_QM0_GLBL_CFG1,
3461 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3462 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3463 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3464 
3465 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3466 		WREG32(mmNIC3_QM1_GLBL_CFG1,
3467 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3468 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3469 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3470 
3471 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3472 		WREG32(mmNIC4_QM0_GLBL_CFG1,
3473 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3474 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3475 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3476 
3477 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3478 		WREG32(mmNIC4_QM1_GLBL_CFG1,
3479 				NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3480 				NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3481 				NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3482 }
3483 
gaudi_pci_dma_stall(struct hl_device * hdev)3484 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3485 {
3486 	struct gaudi_device *gaudi = hdev->asic_specific;
3487 
3488 	if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3489 		return;
3490 
3491 	WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3492 	WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 	WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 }
3495 
gaudi_hbm_dma_stall(struct hl_device * hdev)3496 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3497 {
3498 	struct gaudi_device *gaudi = hdev->asic_specific;
3499 
3500 	if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3501 		return;
3502 
3503 	WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3504 	WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 	WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 	WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 	WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 }
3509 
gaudi_mme_stall(struct hl_device * hdev)3510 static void gaudi_mme_stall(struct hl_device *hdev)
3511 {
3512 	struct gaudi_device *gaudi = hdev->asic_specific;
3513 
3514 	if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3515 		return;
3516 
3517 	/* WA for H3-1800 bug: do ACC and SBAB writes twice */
3518 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3519 	WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3521 	WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 	WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 	WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 	WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 	WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 	WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 	WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 }
3535 
gaudi_tpc_stall(struct hl_device * hdev)3536 static void gaudi_tpc_stall(struct hl_device *hdev)
3537 {
3538 	struct gaudi_device *gaudi = hdev->asic_specific;
3539 
3540 	if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3541 		return;
3542 
3543 	WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3544 	WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 	WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 	WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 	WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 	WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 	WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 	WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 }
3552 
gaudi_disable_clock_gating(struct hl_device * hdev)3553 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3554 {
3555 	u32 qman_offset;
3556 	int i;
3557 
3558 	if (hdev->asic_prop.fw_security_enabled)
3559 		return;
3560 
3561 	for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3562 		WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3563 		WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3564 
3565 		qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3566 	}
3567 
3568 	WREG32(mmMME0_QM_CGM_CFG, 0);
3569 	WREG32(mmMME0_QM_CGM_CFG1, 0);
3570 	WREG32(mmMME2_QM_CGM_CFG, 0);
3571 	WREG32(mmMME2_QM_CGM_CFG1, 0);
3572 
3573 	for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3574 		WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3575 		WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3576 
3577 		qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3578 	}
3579 }
3580 
gaudi_enable_timestamp(struct hl_device * hdev)3581 static void gaudi_enable_timestamp(struct hl_device *hdev)
3582 {
3583 	/* Disable the timestamp counter */
3584 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3585 
3586 	/* Zero the lower/upper parts of the 64-bit counter */
3587 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3588 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3589 
3590 	/* Enable the counter */
3591 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3592 }
3593 
gaudi_disable_timestamp(struct hl_device * hdev)3594 static void gaudi_disable_timestamp(struct hl_device *hdev)
3595 {
3596 	/* Disable the timestamp counter */
3597 	WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3598 }
3599 
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3600 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3601 {
3602 	u32 wait_timeout_ms;
3603 
3604 	if (hdev->pldm)
3605 		wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3606 	else
3607 		wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3608 
3609 	if (fw_reset)
3610 		goto skip_engines;
3611 
3612 	gaudi_stop_nic_qmans(hdev);
3613 	gaudi_stop_mme_qmans(hdev);
3614 	gaudi_stop_tpc_qmans(hdev);
3615 	gaudi_stop_hbm_dma_qmans(hdev);
3616 	gaudi_stop_pci_dma_qmans(hdev);
3617 
3618 	msleep(wait_timeout_ms);
3619 
3620 	gaudi_pci_dma_stall(hdev);
3621 	gaudi_hbm_dma_stall(hdev);
3622 	gaudi_tpc_stall(hdev);
3623 	gaudi_mme_stall(hdev);
3624 
3625 	msleep(wait_timeout_ms);
3626 
3627 	gaudi_disable_nic_qmans(hdev);
3628 	gaudi_disable_mme_qmans(hdev);
3629 	gaudi_disable_tpc_qmans(hdev);
3630 	gaudi_disable_hbm_dma_qmans(hdev);
3631 	gaudi_disable_pci_dma_qmans(hdev);
3632 
3633 	gaudi_disable_timestamp(hdev);
3634 
3635 skip_engines:
3636 	gaudi_disable_msi(hdev);
3637 }
3638 
gaudi_mmu_init(struct hl_device * hdev)3639 static int gaudi_mmu_init(struct hl_device *hdev)
3640 {
3641 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3642 	struct gaudi_device *gaudi = hdev->asic_specific;
3643 	u64 hop0_addr;
3644 	int rc, i;
3645 
3646 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3647 		return 0;
3648 
3649 	for (i = 0 ; i < prop->max_asid ; i++) {
3650 		hop0_addr = prop->mmu_pgt_addr +
3651 				(i * prop->dmmu.hop_table_size);
3652 
3653 		rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3654 		if (rc) {
3655 			dev_err(hdev->dev,
3656 				"failed to set hop0 addr for asid %d\n", i);
3657 			return rc;
3658 		}
3659 	}
3660 
3661 	/* init MMU cache manage page */
3662 	WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3663 	WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3664 
3665 	/* mem cache invalidation */
3666 	WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3667 
3668 	rc = hl_mmu_invalidate_cache(hdev, true, 0);
3669 	if (rc)
3670 		return rc;
3671 
3672 	WREG32(mmMMU_UP_MMU_ENABLE, 1);
3673 	WREG32(mmMMU_UP_SPI_MASK, 0xF);
3674 
3675 	WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3676 
3677 	/*
3678 	 * The H/W expects the first PI after init to be 1. After wraparound
3679 	 * we'll write 0.
3680 	 */
3681 	gaudi->mmu_cache_inv_pi = 1;
3682 
3683 	gaudi->hw_cap_initialized |= HW_CAP_MMU;
3684 
3685 	return 0;
3686 }
3687 
gaudi_load_firmware_to_device(struct hl_device * hdev)3688 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3689 {
3690 	void __iomem *dst;
3691 
3692 	dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3693 
3694 	return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3695 }
3696 
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3697 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3698 {
3699 	void __iomem *dst;
3700 
3701 	dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3702 
3703 	return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3704 }
3705 
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3706 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3707 {
3708 	struct dynamic_fw_load_mgr *dynamic_loader;
3709 	struct cpu_dyn_regs *dyn_regs;
3710 
3711 	dynamic_loader = &hdev->fw_loader.dynamic_loader;
3712 
3713 	/*
3714 	 * here we update initial values for few specific dynamic regs (as
3715 	 * before reading the first descriptor from FW those value has to be
3716 	 * hard-coded) in later stages of the protocol those values will be
3717 	 * updated automatically by reading the FW descriptor so data there
3718 	 * will always be up-to-date
3719 	 */
3720 	dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3721 	dyn_regs->kmd_msg_to_cpu =
3722 				cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3723 	dyn_regs->cpu_cmd_status_to_host =
3724 				cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3725 
3726 	dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3727 }
3728 
gaudi_init_static_firmware_loader(struct hl_device * hdev)3729 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3730 {
3731 	struct static_fw_load_mgr *static_loader;
3732 
3733 	static_loader = &hdev->fw_loader.static_loader;
3734 
3735 	static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3736 	static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 	static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3738 	static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3739 	static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3740 	static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3741 	static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3742 	static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3743 	static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3744 	static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3745 	static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3746 	static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3747 	static_loader->cpu_reset_wait_msec = hdev->pldm ?
3748 			GAUDI_PLDM_RESET_WAIT_MSEC :
3749 			GAUDI_CPU_RESET_WAIT_MSEC;
3750 }
3751 
gaudi_init_firmware_preload_params(struct hl_device * hdev)3752 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3753 {
3754 	struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3755 
3756 	pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3757 	pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3758 	pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3759 	pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3760 	pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3761 	pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3762 }
3763 
gaudi_init_firmware_loader(struct hl_device * hdev)3764 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3765 {
3766 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3767 	struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3768 
3769 	/* fill common fields */
3770 	fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3771 	fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3772 	fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3773 	fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3774 	fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3775 	fw_loader->skip_bmc = !hdev->bmc_enable;
3776 	fw_loader->sram_bar_id = SRAM_BAR_ID;
3777 	fw_loader->dram_bar_id = HBM_BAR_ID;
3778 
3779 	if (prop->dynamic_fw_load)
3780 		gaudi_init_dynamic_firmware_loader(hdev);
3781 	else
3782 		gaudi_init_static_firmware_loader(hdev);
3783 }
3784 
gaudi_init_cpu(struct hl_device * hdev)3785 static int gaudi_init_cpu(struct hl_device *hdev)
3786 {
3787 	struct gaudi_device *gaudi = hdev->asic_specific;
3788 	int rc;
3789 
3790 	if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3791 		return 0;
3792 
3793 	if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3794 		return 0;
3795 
3796 	/*
3797 	 * The device CPU works with 40 bits addresses.
3798 	 * This register sets the extension to 50 bits.
3799 	 */
3800 	if (!hdev->asic_prop.fw_security_enabled)
3801 		WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3802 
3803 	rc = hl_fw_init_cpu(hdev);
3804 
3805 	if (rc)
3806 		return rc;
3807 
3808 	gaudi->hw_cap_initialized |= HW_CAP_CPU;
3809 
3810 	return 0;
3811 }
3812 
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3813 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3814 {
3815 	struct cpu_dyn_regs *dyn_regs =
3816 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3817 	struct asic_fixed_properties *prop = &hdev->asic_prop;
3818 	struct gaudi_device *gaudi = hdev->asic_specific;
3819 	u32 status, irq_handler_offset;
3820 	struct hl_eq *eq;
3821 	struct hl_hw_queue *cpu_pq =
3822 			&hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3823 	int err;
3824 
3825 	if (!hdev->cpu_queues_enable)
3826 		return 0;
3827 
3828 	if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3829 		return 0;
3830 
3831 	eq = &hdev->event_queue;
3832 
3833 	WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3834 	WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3835 
3836 	WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3837 	WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3838 
3839 	WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3840 			lower_32_bits(hdev->cpu_accessible_dma_address));
3841 	WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3842 			upper_32_bits(hdev->cpu_accessible_dma_address));
3843 
3844 	WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3845 	WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3846 	WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3847 
3848 	/* Used for EQ CI */
3849 	WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3850 
3851 	WREG32(mmCPU_IF_PF_PQ_PI, 0);
3852 
3853 	WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3854 
3855 	irq_handler_offset = prop->gic_interrupts_enable ?
3856 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3857 			le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3858 
3859 	WREG32(irq_handler_offset,
3860 		gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3861 
3862 	err = hl_poll_timeout(
3863 		hdev,
3864 		mmCPU_IF_QUEUE_INIT,
3865 		status,
3866 		(status == PQ_INIT_STATUS_READY_FOR_HOST),
3867 		1000,
3868 		cpu_timeout);
3869 
3870 	if (err) {
3871 		dev_err(hdev->dev,
3872 			"Failed to communicate with Device CPU (CPU-CP timeout)\n");
3873 		return -EIO;
3874 	}
3875 
3876 	/* update FW application security bits */
3877 	if (prop->fw_cpu_boot_dev_sts0_valid)
3878 		prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3879 	if (prop->fw_cpu_boot_dev_sts1_valid)
3880 		prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3881 
3882 	gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3883 	return 0;
3884 }
3885 
gaudi_pre_hw_init(struct hl_device * hdev)3886 static void gaudi_pre_hw_init(struct hl_device *hdev)
3887 {
3888 	/* Perform read from the device to make sure device is up */
3889 	RREG32(mmHW_STATE);
3890 
3891 	if (!hdev->asic_prop.fw_security_enabled) {
3892 		/* Set the access through PCI bars (Linux driver only) as
3893 		 * secured
3894 		 */
3895 		WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3896 				(PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3897 				PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3898 
3899 		/* Perform read to flush the waiting writes to ensure
3900 		 * configuration was set in the device
3901 		 */
3902 		RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3903 	}
3904 
3905 	/*
3906 	 * Let's mark in the H/W that we have reached this point. We check
3907 	 * this value in the reset_before_init function to understand whether
3908 	 * we need to reset the chip before doing H/W init. This register is
3909 	 * cleared by the H/W upon H/W reset
3910 	 */
3911 	WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3912 }
3913 
gaudi_hw_init(struct hl_device * hdev)3914 static int gaudi_hw_init(struct hl_device *hdev)
3915 {
3916 	struct gaudi_device *gaudi = hdev->asic_specific;
3917 	int rc;
3918 
3919 	gaudi_pre_hw_init(hdev);
3920 
3921 	/* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3922 	 * So we set it here and if anyone tries to move it later to
3923 	 * a different address, there will be an error
3924 	 */
3925 	if (hdev->asic_prop.iatu_done_by_fw)
3926 		gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3927 
3928 	/*
3929 	 * Before pushing u-boot/linux to device, need to set the hbm bar to
3930 	 * base address of dram
3931 	 */
3932 	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3933 		dev_err(hdev->dev,
3934 			"failed to map HBM bar to DRAM base address\n");
3935 		return -EIO;
3936 	}
3937 
3938 	rc = gaudi_init_cpu(hdev);
3939 	if (rc) {
3940 		dev_err(hdev->dev, "failed to initialize CPU\n");
3941 		return rc;
3942 	}
3943 
3944 	/* In case the clock gating was enabled in preboot we need to disable
3945 	 * it here before touching the MME/TPC registers.
3946 	 */
3947 	gaudi_disable_clock_gating(hdev);
3948 
3949 	/* SRAM scrambler must be initialized after CPU is running from HBM */
3950 	gaudi_init_scrambler_sram(hdev);
3951 
3952 	/* This is here just in case we are working without CPU */
3953 	gaudi_init_scrambler_hbm(hdev);
3954 
3955 	gaudi_init_golden_registers(hdev);
3956 
3957 	rc = gaudi_mmu_init(hdev);
3958 	if (rc)
3959 		return rc;
3960 
3961 	gaudi_init_security(hdev);
3962 
3963 	gaudi_init_pci_dma_qmans(hdev);
3964 
3965 	gaudi_init_hbm_dma_qmans(hdev);
3966 
3967 	gaudi_init_mme_qmans(hdev);
3968 
3969 	gaudi_init_tpc_qmans(hdev);
3970 
3971 	gaudi_init_nic_qmans(hdev);
3972 
3973 	gaudi_enable_timestamp(hdev);
3974 
3975 	/* MSI must be enabled before CPU queues and NIC are initialized */
3976 	rc = gaudi_enable_msi(hdev);
3977 	if (rc)
3978 		goto disable_queues;
3979 
3980 	/* must be called after MSI was enabled */
3981 	rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3982 	if (rc) {
3983 		dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3984 			rc);
3985 		goto disable_msi;
3986 	}
3987 
3988 	/* Perform read from the device to flush all configuration */
3989 	RREG32(mmHW_STATE);
3990 
3991 	return 0;
3992 
3993 disable_msi:
3994 	gaudi_disable_msi(hdev);
3995 disable_queues:
3996 	gaudi_disable_mme_qmans(hdev);
3997 	gaudi_disable_pci_dma_qmans(hdev);
3998 
3999 	return rc;
4000 }
4001 
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4002 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4003 {
4004 	struct cpu_dyn_regs *dyn_regs =
4005 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4006 	u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4007 	struct gaudi_device *gaudi = hdev->asic_specific;
4008 	bool driver_performs_reset;
4009 
4010 	if (!hard_reset) {
4011 		dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4012 		return 0;
4013 	}
4014 
4015 	if (hdev->pldm) {
4016 		reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4017 		cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4018 	} else {
4019 		reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4020 		cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4021 	}
4022 
4023 	if (fw_reset) {
4024 		dev_dbg(hdev->dev,
4025 			"Firmware performs HARD reset, going to wait %dms\n",
4026 			reset_timeout_ms);
4027 
4028 		goto skip_reset;
4029 	}
4030 
4031 	driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4032 					!hdev->asic_prop.hard_reset_done_by_fw);
4033 
4034 	/* Set device to handle FLR by H/W as we will put the device CPU to
4035 	 * halt mode
4036 	 */
4037 	if (driver_performs_reset)
4038 		WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4039 					PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4040 
4041 	/* If linux is loaded in the device CPU we need to communicate with it
4042 	 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4043 	 * registers in case of old F/Ws
4044 	 */
4045 	if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4046 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4047 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4048 				le32_to_cpu(dyn_regs->gic_host_halt_irq);
4049 
4050 		WREG32(irq_handler_offset,
4051 			gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4052 
4053 		/* This is a hail-mary attempt to revive the card in the small chance that the
4054 		 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4055 		 * In that case, triggering reset through GIC won't help. We need to trigger the
4056 		 * reset as if Linux wasn't loaded.
4057 		 *
4058 		 * We do it only if the reset cause was HB, because that would be the indication
4059 		 * of such an event.
4060 		 *
4061 		 * In case watchdog hasn't expired but we still got HB, then this won't do any
4062 		 * damage.
4063 		 */
4064 		if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4065 			if (hdev->asic_prop.hard_reset_done_by_fw)
4066 				hl_fw_ask_hard_reset_without_linux(hdev);
4067 			else
4068 				hl_fw_ask_halt_machine_without_linux(hdev);
4069 		}
4070 	} else {
4071 		if (hdev->asic_prop.hard_reset_done_by_fw)
4072 			hl_fw_ask_hard_reset_without_linux(hdev);
4073 		else
4074 			hl_fw_ask_halt_machine_without_linux(hdev);
4075 	}
4076 
4077 	if (driver_performs_reset) {
4078 
4079 		/* Configure the reset registers. Must be done as early as
4080 		 * possible in case we fail during H/W initialization
4081 		 */
4082 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4083 						(CFG_RST_H_DMA_MASK |
4084 						CFG_RST_H_MME_MASK |
4085 						CFG_RST_H_SM_MASK |
4086 						CFG_RST_H_TPC_7_MASK));
4087 
4088 		WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4089 
4090 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4091 						(CFG_RST_H_HBM_MASK |
4092 						CFG_RST_H_TPC_7_MASK |
4093 						CFG_RST_H_NIC_MASK |
4094 						CFG_RST_H_SM_MASK |
4095 						CFG_RST_H_DMA_MASK |
4096 						CFG_RST_H_MME_MASK |
4097 						CFG_RST_H_CPU_MASK |
4098 						CFG_RST_H_MMU_MASK));
4099 
4100 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4101 						(CFG_RST_L_IF_MASK |
4102 						CFG_RST_L_PSOC_MASK |
4103 						CFG_RST_L_TPC_MASK));
4104 
4105 		msleep(cpu_timeout_ms);
4106 
4107 		/* Tell ASIC not to re-initialize PCIe */
4108 		WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4109 
4110 		/* Restart BTL/BLR upon hard-reset */
4111 		WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4112 
4113 		WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4114 			1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4115 
4116 		dev_dbg(hdev->dev,
4117 			"Issued HARD reset command, going to wait %dms\n",
4118 			reset_timeout_ms);
4119 	} else {
4120 		dev_dbg(hdev->dev,
4121 			"Firmware performs HARD reset, going to wait %dms\n",
4122 			reset_timeout_ms);
4123 	}
4124 
4125 skip_reset:
4126 	/*
4127 	 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4128 	 * itself is in reset. Need to wait until the reset is deasserted
4129 	 */
4130 	msleep(reset_timeout_ms);
4131 
4132 	status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4133 	if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4134 		dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4135 		return -ETIMEDOUT;
4136 	}
4137 
4138 	if (gaudi) {
4139 		gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4140 						HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4141 						HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4142 						HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4143 						HW_CAP_HBM_SCRAMBLER);
4144 
4145 		memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4146 
4147 		hdev->device_cpu_is_halted = false;
4148 	}
4149 	return 0;
4150 }
4151 
gaudi_suspend(struct hl_device * hdev)4152 static int gaudi_suspend(struct hl_device *hdev)
4153 {
4154 	return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4155 }
4156 
gaudi_resume(struct hl_device * hdev)4157 static int gaudi_resume(struct hl_device *hdev)
4158 {
4159 	return gaudi_init_iatu(hdev);
4160 }
4161 
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4162 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4163 			void *cpu_addr, dma_addr_t dma_addr, size_t size)
4164 {
4165 	int rc;
4166 
4167 	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4168 			VM_DONTCOPY | VM_NORESERVE);
4169 
4170 #ifdef _HAS_DMA_MMAP_COHERENT
4171 	/*
4172 	 * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
4173 	 * so vm_insert_page() can handle it safely. Without this, the kernel
4174 	 * may BUG_ON due to VM_PFNMAP.
4175 	 */
4176 	if (is_vmalloc_addr(cpu_addr))
4177 		vm_flags_set(vma, VM_MIXEDMAP);
4178 
4179 	rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4180 				(dma_addr - HOST_PHYS_BASE), size);
4181 	if (rc)
4182 		dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4183 #else
4184 
4185 	rc = remap_pfn_range(vma, vma->vm_start,
4186 				virt_to_phys(cpu_addr) >> PAGE_SHIFT,
4187 				size, vma->vm_page_prot);
4188 	if (rc)
4189 		dev_err(hdev->dev, "remap_pfn_range error %d", rc);
4190 
4191  #endif
4192 
4193 
4194 	return rc;
4195 }
4196 
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4197 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4198 {
4199 	struct cpu_dyn_regs *dyn_regs =
4200 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4201 	u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4202 	struct gaudi_device *gaudi = hdev->asic_specific;
4203 	bool invalid_queue = false;
4204 	int dma_id;
4205 
4206 	switch (hw_queue_id) {
4207 	case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4208 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4209 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4210 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4211 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4212 		break;
4213 
4214 	case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4215 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4216 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4217 		q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4218 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4219 		break;
4220 
4221 	case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4222 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4223 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4224 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4225 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4226 		break;
4227 
4228 	case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4229 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4230 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4231 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4232 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4233 		break;
4234 
4235 	case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4236 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4237 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4238 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4239 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4240 		break;
4241 
4242 	case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4243 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4244 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4245 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4246 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4247 		break;
4248 
4249 	case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4250 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4251 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4252 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4253 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4254 		break;
4255 
4256 	case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4257 		dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4258 		dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4259 		q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4260 		db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4261 		break;
4262 
4263 	case GAUDI_QUEUE_ID_CPU_PQ:
4264 		if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4265 			db_reg_offset = mmCPU_IF_PF_PQ_PI;
4266 		else
4267 			invalid_queue = true;
4268 		break;
4269 
4270 	case GAUDI_QUEUE_ID_MME_0_0:
4271 		db_reg_offset = mmMME2_QM_PQ_PI_0;
4272 		break;
4273 
4274 	case GAUDI_QUEUE_ID_MME_0_1:
4275 		db_reg_offset = mmMME2_QM_PQ_PI_1;
4276 		break;
4277 
4278 	case GAUDI_QUEUE_ID_MME_0_2:
4279 		db_reg_offset = mmMME2_QM_PQ_PI_2;
4280 		break;
4281 
4282 	case GAUDI_QUEUE_ID_MME_0_3:
4283 		db_reg_offset = mmMME2_QM_PQ_PI_3;
4284 		break;
4285 
4286 	case GAUDI_QUEUE_ID_MME_1_0:
4287 		db_reg_offset = mmMME0_QM_PQ_PI_0;
4288 		break;
4289 
4290 	case GAUDI_QUEUE_ID_MME_1_1:
4291 		db_reg_offset = mmMME0_QM_PQ_PI_1;
4292 		break;
4293 
4294 	case GAUDI_QUEUE_ID_MME_1_2:
4295 		db_reg_offset = mmMME0_QM_PQ_PI_2;
4296 		break;
4297 
4298 	case GAUDI_QUEUE_ID_MME_1_3:
4299 		db_reg_offset = mmMME0_QM_PQ_PI_3;
4300 		break;
4301 
4302 	case GAUDI_QUEUE_ID_TPC_0_0:
4303 		db_reg_offset = mmTPC0_QM_PQ_PI_0;
4304 		break;
4305 
4306 	case GAUDI_QUEUE_ID_TPC_0_1:
4307 		db_reg_offset = mmTPC0_QM_PQ_PI_1;
4308 		break;
4309 
4310 	case GAUDI_QUEUE_ID_TPC_0_2:
4311 		db_reg_offset = mmTPC0_QM_PQ_PI_2;
4312 		break;
4313 
4314 	case GAUDI_QUEUE_ID_TPC_0_3:
4315 		db_reg_offset = mmTPC0_QM_PQ_PI_3;
4316 		break;
4317 
4318 	case GAUDI_QUEUE_ID_TPC_1_0:
4319 		db_reg_offset = mmTPC1_QM_PQ_PI_0;
4320 		break;
4321 
4322 	case GAUDI_QUEUE_ID_TPC_1_1:
4323 		db_reg_offset = mmTPC1_QM_PQ_PI_1;
4324 		break;
4325 
4326 	case GAUDI_QUEUE_ID_TPC_1_2:
4327 		db_reg_offset = mmTPC1_QM_PQ_PI_2;
4328 		break;
4329 
4330 	case GAUDI_QUEUE_ID_TPC_1_3:
4331 		db_reg_offset = mmTPC1_QM_PQ_PI_3;
4332 		break;
4333 
4334 	case GAUDI_QUEUE_ID_TPC_2_0:
4335 		db_reg_offset = mmTPC2_QM_PQ_PI_0;
4336 		break;
4337 
4338 	case GAUDI_QUEUE_ID_TPC_2_1:
4339 		db_reg_offset = mmTPC2_QM_PQ_PI_1;
4340 		break;
4341 
4342 	case GAUDI_QUEUE_ID_TPC_2_2:
4343 		db_reg_offset = mmTPC2_QM_PQ_PI_2;
4344 		break;
4345 
4346 	case GAUDI_QUEUE_ID_TPC_2_3:
4347 		db_reg_offset = mmTPC2_QM_PQ_PI_3;
4348 		break;
4349 
4350 	case GAUDI_QUEUE_ID_TPC_3_0:
4351 		db_reg_offset = mmTPC3_QM_PQ_PI_0;
4352 		break;
4353 
4354 	case GAUDI_QUEUE_ID_TPC_3_1:
4355 		db_reg_offset = mmTPC3_QM_PQ_PI_1;
4356 		break;
4357 
4358 	case GAUDI_QUEUE_ID_TPC_3_2:
4359 		db_reg_offset = mmTPC3_QM_PQ_PI_2;
4360 		break;
4361 
4362 	case GAUDI_QUEUE_ID_TPC_3_3:
4363 		db_reg_offset = mmTPC3_QM_PQ_PI_3;
4364 		break;
4365 
4366 	case GAUDI_QUEUE_ID_TPC_4_0:
4367 		db_reg_offset = mmTPC4_QM_PQ_PI_0;
4368 		break;
4369 
4370 	case GAUDI_QUEUE_ID_TPC_4_1:
4371 		db_reg_offset = mmTPC4_QM_PQ_PI_1;
4372 		break;
4373 
4374 	case GAUDI_QUEUE_ID_TPC_4_2:
4375 		db_reg_offset = mmTPC4_QM_PQ_PI_2;
4376 		break;
4377 
4378 	case GAUDI_QUEUE_ID_TPC_4_3:
4379 		db_reg_offset = mmTPC4_QM_PQ_PI_3;
4380 		break;
4381 
4382 	case GAUDI_QUEUE_ID_TPC_5_0:
4383 		db_reg_offset = mmTPC5_QM_PQ_PI_0;
4384 		break;
4385 
4386 	case GAUDI_QUEUE_ID_TPC_5_1:
4387 		db_reg_offset = mmTPC5_QM_PQ_PI_1;
4388 		break;
4389 
4390 	case GAUDI_QUEUE_ID_TPC_5_2:
4391 		db_reg_offset = mmTPC5_QM_PQ_PI_2;
4392 		break;
4393 
4394 	case GAUDI_QUEUE_ID_TPC_5_3:
4395 		db_reg_offset = mmTPC5_QM_PQ_PI_3;
4396 		break;
4397 
4398 	case GAUDI_QUEUE_ID_TPC_6_0:
4399 		db_reg_offset = mmTPC6_QM_PQ_PI_0;
4400 		break;
4401 
4402 	case GAUDI_QUEUE_ID_TPC_6_1:
4403 		db_reg_offset = mmTPC6_QM_PQ_PI_1;
4404 		break;
4405 
4406 	case GAUDI_QUEUE_ID_TPC_6_2:
4407 		db_reg_offset = mmTPC6_QM_PQ_PI_2;
4408 		break;
4409 
4410 	case GAUDI_QUEUE_ID_TPC_6_3:
4411 		db_reg_offset = mmTPC6_QM_PQ_PI_3;
4412 		break;
4413 
4414 	case GAUDI_QUEUE_ID_TPC_7_0:
4415 		db_reg_offset = mmTPC7_QM_PQ_PI_0;
4416 		break;
4417 
4418 	case GAUDI_QUEUE_ID_TPC_7_1:
4419 		db_reg_offset = mmTPC7_QM_PQ_PI_1;
4420 		break;
4421 
4422 	case GAUDI_QUEUE_ID_TPC_7_2:
4423 		db_reg_offset = mmTPC7_QM_PQ_PI_2;
4424 		break;
4425 
4426 	case GAUDI_QUEUE_ID_TPC_7_3:
4427 		db_reg_offset = mmTPC7_QM_PQ_PI_3;
4428 		break;
4429 
4430 	case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4431 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4432 			invalid_queue = true;
4433 
4434 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4435 		db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4436 		break;
4437 
4438 	case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4439 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4440 			invalid_queue = true;
4441 
4442 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4443 		db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4444 		break;
4445 
4446 	case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4447 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4448 			invalid_queue = true;
4449 
4450 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4451 		db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4452 		break;
4453 
4454 	case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4455 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4456 			invalid_queue = true;
4457 
4458 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4459 		db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4460 		break;
4461 
4462 	case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4463 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4464 			invalid_queue = true;
4465 
4466 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4467 		db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4468 		break;
4469 
4470 	case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4471 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4472 			invalid_queue = true;
4473 
4474 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4475 		db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4476 		break;
4477 
4478 	case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4479 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4480 			invalid_queue = true;
4481 
4482 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4483 		db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4484 		break;
4485 
4486 	case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4487 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4488 			invalid_queue = true;
4489 
4490 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4491 		db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4492 		break;
4493 
4494 	case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4495 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4496 			invalid_queue = true;
4497 
4498 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4499 		db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4500 		break;
4501 
4502 	case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4503 		if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4504 			invalid_queue = true;
4505 
4506 		q_off = ((hw_queue_id - 1) & 0x3) * 4;
4507 		db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4508 		break;
4509 
4510 	default:
4511 		invalid_queue = true;
4512 	}
4513 
4514 	if (invalid_queue) {
4515 		/* Should never get here */
4516 		dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4517 			hw_queue_id);
4518 		return;
4519 	}
4520 
4521 	db_value = pi;
4522 
4523 	/* ring the doorbell */
4524 	WREG32(db_reg_offset, db_value);
4525 
4526 	if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4527 		/* make sure device CPU will read latest data from host */
4528 		mb();
4529 
4530 		irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4531 				mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4532 				le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4533 
4534 		WREG32(irq_handler_offset,
4535 			gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4536 	}
4537 }
4538 
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4539 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4540 				struct hl_bd *bd)
4541 {
4542 	__le64 *pbd = (__le64 *) bd;
4543 
4544 	/* The QMANs are on the host memory so a simple copy suffice */
4545 	pqe[0] = pbd[0];
4546 	pqe[1] = pbd[1];
4547 }
4548 
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4549 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4550 					dma_addr_t *dma_handle, gfp_t flags)
4551 {
4552 	void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4553 						dma_handle, flags);
4554 
4555 	/* Shift to the device's base physical address of host memory */
4556 	if (kernel_addr)
4557 		*dma_handle += HOST_PHYS_BASE;
4558 
4559 	return kernel_addr;
4560 }
4561 
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4562 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4563 		void *cpu_addr, dma_addr_t dma_handle)
4564 {
4565 	/* Cancel the device's base physical address of host memory */
4566 	dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4567 
4568 	dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4569 }
4570 
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4571 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4572 {
4573 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4574 	u64 cur_addr = prop->dram_user_base_address;
4575 	u32 chunk_size, busy;
4576 	int rc, dma_id;
4577 
4578 	while (cur_addr < prop->dram_end_address) {
4579 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4580 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4581 
4582 			chunk_size =
4583 			min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4584 
4585 			dev_dbg(hdev->dev,
4586 				"Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4587 				cur_addr, cur_addr + chunk_size);
4588 
4589 			WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4590 					lower_32_bits(val));
4591 			WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4592 					upper_32_bits(val));
4593 			WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4594 						lower_32_bits(cur_addr));
4595 			WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4596 						upper_32_bits(cur_addr));
4597 			WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4598 					chunk_size);
4599 			WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4600 					((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4601 					(1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4602 
4603 			cur_addr += chunk_size;
4604 
4605 			if (cur_addr == prop->dram_end_address)
4606 				break;
4607 		}
4608 
4609 		for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4610 			u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4611 
4612 			rc = hl_poll_timeout(
4613 				hdev,
4614 				mmDMA0_CORE_STS0 + dma_offset,
4615 				busy,
4616 				((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4617 				1000,
4618 				HBM_SCRUBBING_TIMEOUT_US);
4619 
4620 			if (rc) {
4621 				dev_err(hdev->dev,
4622 					"DMA Timeout during HBM scrubbing of DMA #%d\n",
4623 					dma_id);
4624 				return -EIO;
4625 			}
4626 		}
4627 	}
4628 
4629 	return 0;
4630 }
4631 
gaudi_scrub_device_mem(struct hl_device * hdev)4632 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4633 {
4634 	struct asic_fixed_properties *prop = &hdev->asic_prop;
4635 	u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4636 	u64 addr, size, val = hdev->memory_scrub_val;
4637 	ktime_t timeout;
4638 	int rc = 0;
4639 
4640 	if (!hdev->memory_scrub)
4641 		return 0;
4642 
4643 	timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4644 	while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4645 		if (ktime_compare(ktime_get(), timeout) > 0) {
4646 			dev_err(hdev->dev, "waiting for idle timeout\n");
4647 			return -ETIMEDOUT;
4648 		}
4649 		usleep_range((1000 >> 2) + 1, 1000);
4650 	}
4651 
4652 	/* Scrub SRAM */
4653 	addr = prop->sram_user_base_address;
4654 	size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4655 
4656 	dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4657 			addr, addr + size, val);
4658 	rc = gaudi_memset_device_memory(hdev, addr, size, val);
4659 	if (rc) {
4660 		dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4661 		return rc;
4662 	}
4663 
4664 	/* Scrub HBM using all DMA channels in parallel */
4665 	rc = gaudi_scrub_device_dram(hdev, val);
4666 	if (rc) {
4667 		dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4668 		return rc;
4669 	}
4670 
4671 	return 0;
4672 }
4673 
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4674 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4675 				u32 queue_id, dma_addr_t *dma_handle,
4676 				u16 *queue_len)
4677 {
4678 	struct gaudi_device *gaudi = hdev->asic_specific;
4679 	struct gaudi_internal_qman_info *q;
4680 
4681 	if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4682 			gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4683 		dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4684 		return NULL;
4685 	}
4686 
4687 	q = &gaudi->internal_qmans[queue_id];
4688 	*dma_handle = q->pq_dma_addr;
4689 	*queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4690 
4691 	return q->pq_kernel_addr;
4692 }
4693 
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4694 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4695 				u16 len, u32 timeout, u64 *result)
4696 {
4697 	struct gaudi_device *gaudi = hdev->asic_specific;
4698 
4699 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4700 		if (result)
4701 			*result = 0;
4702 		return 0;
4703 	}
4704 
4705 	if (!timeout)
4706 		timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4707 
4708 	return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4709 						timeout, result);
4710 }
4711 
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4712 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4713 {
4714 	struct packet_msg_prot *fence_pkt;
4715 	dma_addr_t pkt_dma_addr;
4716 	u32 fence_val, tmp, timeout_usec;
4717 	dma_addr_t fence_dma_addr;
4718 	u32 *fence_ptr;
4719 	int rc;
4720 
4721 	if (hdev->pldm)
4722 		timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4723 	else
4724 		timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4725 
4726 	fence_val = GAUDI_QMAN0_FENCE_VAL;
4727 
4728 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4729 	if (!fence_ptr) {
4730 		dev_err(hdev->dev,
4731 			"Failed to allocate memory for H/W queue %d testing\n",
4732 			hw_queue_id);
4733 		return -ENOMEM;
4734 	}
4735 
4736 	*fence_ptr = 0;
4737 
4738 	fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4739 						&pkt_dma_addr);
4740 	if (!fence_pkt) {
4741 		dev_err(hdev->dev,
4742 			"Failed to allocate packet for H/W queue %d testing\n",
4743 			hw_queue_id);
4744 		rc = -ENOMEM;
4745 		goto free_fence_ptr;
4746 	}
4747 
4748 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4749 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4750 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4751 
4752 	fence_pkt->ctl = cpu_to_le32(tmp);
4753 	fence_pkt->value = cpu_to_le32(fence_val);
4754 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4755 
4756 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4757 					sizeof(struct packet_msg_prot),
4758 					pkt_dma_addr);
4759 	if (rc) {
4760 		dev_err(hdev->dev,
4761 			"Failed to send fence packet to H/W queue %d\n",
4762 			hw_queue_id);
4763 		goto free_pkt;
4764 	}
4765 
4766 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4767 					1000, timeout_usec, true);
4768 
4769 	hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4770 
4771 	if (rc == -ETIMEDOUT) {
4772 		dev_err(hdev->dev,
4773 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4774 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4775 		rc = -EIO;
4776 	}
4777 
4778 free_pkt:
4779 	hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4780 free_fence_ptr:
4781 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4782 	return rc;
4783 }
4784 
gaudi_test_cpu_queue(struct hl_device * hdev)4785 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4786 {
4787 	struct gaudi_device *gaudi = hdev->asic_specific;
4788 
4789 	/*
4790 	 * check capability here as send_cpu_message() won't update the result
4791 	 * value if no capability
4792 	 */
4793 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4794 		return 0;
4795 
4796 	return hl_fw_test_cpu_queue(hdev);
4797 }
4798 
gaudi_test_queues(struct hl_device * hdev)4799 static int gaudi_test_queues(struct hl_device *hdev)
4800 {
4801 	int i, rc, ret_val = 0;
4802 
4803 	for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4804 		if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4805 			rc = gaudi_test_queue(hdev, i);
4806 			if (rc)
4807 				ret_val = -EINVAL;
4808 		}
4809 	}
4810 
4811 	rc = gaudi_test_cpu_queue(hdev);
4812 	if (rc)
4813 		ret_val = -EINVAL;
4814 
4815 	return ret_val;
4816 }
4817 
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4818 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4819 		gfp_t mem_flags, dma_addr_t *dma_handle)
4820 {
4821 	void *kernel_addr;
4822 
4823 	if (size > GAUDI_DMA_POOL_BLK_SIZE)
4824 		return NULL;
4825 
4826 	kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4827 
4828 	/* Shift to the device's base physical address of host memory */
4829 	if (kernel_addr)
4830 		*dma_handle += HOST_PHYS_BASE;
4831 
4832 	return kernel_addr;
4833 }
4834 
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4835 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4836 			dma_addr_t dma_addr)
4837 {
4838 	/* Cancel the device's base physical address of host memory */
4839 	dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4840 
4841 	dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4842 }
4843 
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4844 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4845 					size_t size, dma_addr_t *dma_handle)
4846 {
4847 	return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4848 }
4849 
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4850 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4851 						size_t size, void *vaddr)
4852 {
4853 	hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4854 }
4855 
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4856 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4857 {
4858 	struct scatterlist *sg, *sg_next_iter;
4859 	u32 count, dma_desc_cnt;
4860 	u64 len, len_next;
4861 	dma_addr_t addr, addr_next;
4862 
4863 	dma_desc_cnt = 0;
4864 
4865 	for_each_sgtable_dma_sg(sgt, sg, count) {
4866 		len = sg_dma_len(sg);
4867 		addr = sg_dma_address(sg);
4868 
4869 		if (len == 0)
4870 			break;
4871 
4872 		while ((count + 1) < sgt->nents) {
4873 			sg_next_iter = sg_next(sg);
4874 			len_next = sg_dma_len(sg_next_iter);
4875 			addr_next = sg_dma_address(sg_next_iter);
4876 
4877 			if (len_next == 0)
4878 				break;
4879 
4880 			if ((addr + len == addr_next) &&
4881 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4882 				len += len_next;
4883 				count++;
4884 				sg = sg_next_iter;
4885 			} else {
4886 				break;
4887 			}
4888 		}
4889 
4890 		dma_desc_cnt++;
4891 	}
4892 
4893 	return dma_desc_cnt * sizeof(struct packet_lin_dma);
4894 }
4895 
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4896 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4897 				struct hl_cs_parser *parser,
4898 				struct packet_lin_dma *user_dma_pkt,
4899 				u64 addr, enum dma_data_direction dir)
4900 {
4901 	struct hl_userptr *userptr;
4902 	int rc;
4903 
4904 	if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4905 			parser->job_userptr_list, &userptr))
4906 		goto already_pinned;
4907 
4908 	userptr = kzalloc_obj(*userptr);
4909 	if (!userptr)
4910 		return -ENOMEM;
4911 
4912 	rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4913 				userptr);
4914 	if (rc)
4915 		goto free_userptr;
4916 
4917 	list_add_tail(&userptr->job_node, parser->job_userptr_list);
4918 
4919 	rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4920 	if (rc) {
4921 		dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4922 		goto unpin_memory;
4923 	}
4924 
4925 	userptr->dma_mapped = true;
4926 	userptr->dir = dir;
4927 
4928 already_pinned:
4929 	parser->patched_cb_size +=
4930 			gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4931 
4932 	return 0;
4933 
4934 unpin_memory:
4935 	list_del(&userptr->job_node);
4936 	hl_unpin_host_memory(hdev, userptr);
4937 free_userptr:
4938 	kfree(userptr);
4939 	return rc;
4940 }
4941 
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4942 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4943 				struct hl_cs_parser *parser,
4944 				struct packet_lin_dma *user_dma_pkt,
4945 				bool src_in_host)
4946 {
4947 	enum dma_data_direction dir;
4948 	bool skip_host_mem_pin = false, user_memset;
4949 	u64 addr;
4950 	int rc = 0;
4951 
4952 	user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4953 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4954 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4955 
4956 	if (src_in_host) {
4957 		if (user_memset)
4958 			skip_host_mem_pin = true;
4959 
4960 		dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4961 		dir = DMA_TO_DEVICE;
4962 		addr = le64_to_cpu(user_dma_pkt->src_addr);
4963 	} else {
4964 		dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4965 		dir = DMA_FROM_DEVICE;
4966 		addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4967 				GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4968 				GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4969 	}
4970 
4971 	if (skip_host_mem_pin)
4972 		parser->patched_cb_size += sizeof(*user_dma_pkt);
4973 	else
4974 		rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4975 						addr, dir);
4976 
4977 	return rc;
4978 }
4979 
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)4980 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4981 				struct hl_cs_parser *parser,
4982 				struct packet_lin_dma *user_dma_pkt)
4983 {
4984 	bool src_in_host = false;
4985 	u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4986 			GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4987 			GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4988 
4989 	dev_dbg(hdev->dev, "DMA packet details:\n");
4990 	dev_dbg(hdev->dev, "source == 0x%llx\n",
4991 				le64_to_cpu(user_dma_pkt->src_addr));
4992 	dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4993 	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4994 
4995 	/*
4996 	 * Special handling for DMA with size 0. Bypass all validations
4997 	 * because no transactions will be done except for WR_COMP, which
4998 	 * is not a security issue
4999 	 */
5000 	if (!le32_to_cpu(user_dma_pkt->tsize)) {
5001 		parser->patched_cb_size += sizeof(*user_dma_pkt);
5002 		return 0;
5003 	}
5004 
5005 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5006 		src_in_host = true;
5007 
5008 	return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5009 						src_in_host);
5010 }
5011 
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5012 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5013 					struct hl_cs_parser *parser,
5014 					struct packet_load_and_exe *user_pkt)
5015 {
5016 	u32 cfg;
5017 
5018 	cfg = le32_to_cpu(user_pkt->cfg);
5019 
5020 	if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5021 		dev_err(hdev->dev,
5022 			"User not allowed to use Load and Execute\n");
5023 		return -EPERM;
5024 	}
5025 
5026 	parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5027 
5028 	return 0;
5029 }
5030 
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5031 static int gaudi_validate_cb(struct hl_device *hdev,
5032 			struct hl_cs_parser *parser, bool is_mmu)
5033 {
5034 	u32 cb_parsed_length = 0;
5035 	int rc = 0;
5036 
5037 	parser->patched_cb_size = 0;
5038 
5039 	/* cb_user_size is more than 0 so loop will always be executed */
5040 	while (cb_parsed_length < parser->user_cb_size) {
5041 		enum packet_id pkt_id;
5042 		u16 pkt_size;
5043 		struct gaudi_packet *user_pkt;
5044 
5045 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5046 
5047 		pkt_id = (enum packet_id) (
5048 				(le64_to_cpu(user_pkt->header) &
5049 				PACKET_HEADER_PACKET_ID_MASK) >>
5050 					PACKET_HEADER_PACKET_ID_SHIFT);
5051 
5052 		if (!validate_packet_id(pkt_id)) {
5053 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5054 			rc = -EINVAL;
5055 			break;
5056 		}
5057 
5058 		pkt_size = gaudi_packet_sizes[pkt_id];
5059 		cb_parsed_length += pkt_size;
5060 		if (cb_parsed_length > parser->user_cb_size) {
5061 			dev_err(hdev->dev,
5062 				"packet 0x%x is out of CB boundary\n", pkt_id);
5063 			rc = -EINVAL;
5064 			break;
5065 		}
5066 
5067 		switch (pkt_id) {
5068 		case PACKET_MSG_PROT:
5069 			dev_err(hdev->dev,
5070 				"User not allowed to use MSG_PROT\n");
5071 			rc = -EPERM;
5072 			break;
5073 
5074 		case PACKET_CP_DMA:
5075 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5076 			rc = -EPERM;
5077 			break;
5078 
5079 		case PACKET_STOP:
5080 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5081 			rc = -EPERM;
5082 			break;
5083 
5084 		case PACKET_WREG_BULK:
5085 			dev_err(hdev->dev,
5086 				"User not allowed to use WREG_BULK\n");
5087 			rc = -EPERM;
5088 			break;
5089 
5090 		case PACKET_LOAD_AND_EXE:
5091 			rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5092 				(struct packet_load_and_exe *) user_pkt);
5093 			break;
5094 
5095 		case PACKET_LIN_DMA:
5096 			parser->contains_dma_pkt = true;
5097 			if (is_mmu)
5098 				parser->patched_cb_size += pkt_size;
5099 			else
5100 				rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5101 					(struct packet_lin_dma *) user_pkt);
5102 			break;
5103 
5104 		case PACKET_WREG_32:
5105 		case PACKET_MSG_LONG:
5106 		case PACKET_MSG_SHORT:
5107 		case PACKET_REPEAT:
5108 		case PACKET_FENCE:
5109 		case PACKET_NOP:
5110 		case PACKET_ARB_POINT:
5111 			parser->patched_cb_size += pkt_size;
5112 			break;
5113 
5114 		default:
5115 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5116 				pkt_id);
5117 			rc = -EINVAL;
5118 			break;
5119 		}
5120 
5121 		if (rc)
5122 			break;
5123 	}
5124 
5125 	/*
5126 	 * The new CB should have space at the end for two MSG_PROT packets:
5127 	 * 1. Optional NOP padding for cacheline alignment
5128 	 * 2. A packet that will act as a completion packet
5129 	 * 3. A packet that will generate MSI interrupt
5130 	 */
5131 	if (parser->completion)
5132 		parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5133 			parser->patched_cb_size);
5134 
5135 	return rc;
5136 }
5137 
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5138 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5139 				struct hl_cs_parser *parser,
5140 				struct packet_lin_dma *user_dma_pkt,
5141 				struct packet_lin_dma *new_dma_pkt,
5142 				u32 *new_dma_pkt_size)
5143 {
5144 	struct hl_userptr *userptr;
5145 	struct scatterlist *sg, *sg_next_iter;
5146 	u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5147 	u64 len, len_next;
5148 	dma_addr_t dma_addr, dma_addr_next;
5149 	u64 device_memory_addr, addr;
5150 	enum dma_data_direction dir;
5151 	struct sg_table *sgt;
5152 	bool src_in_host = false;
5153 	bool skip_host_mem_pin = false;
5154 	bool user_memset;
5155 
5156 	ctl = le32_to_cpu(user_dma_pkt->ctl);
5157 
5158 	if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5159 		src_in_host = true;
5160 
5161 	user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5162 			GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5163 
5164 	if (src_in_host) {
5165 		addr = le64_to_cpu(user_dma_pkt->src_addr);
5166 		device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5167 		dir = DMA_TO_DEVICE;
5168 		if (user_memset)
5169 			skip_host_mem_pin = true;
5170 	} else {
5171 		addr = le64_to_cpu(user_dma_pkt->dst_addr);
5172 		device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5173 		dir = DMA_FROM_DEVICE;
5174 	}
5175 
5176 	if ((!skip_host_mem_pin) &&
5177 		(!hl_userptr_is_pinned(hdev, addr,
5178 					le32_to_cpu(user_dma_pkt->tsize),
5179 					parser->job_userptr_list, &userptr))) {
5180 		dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5181 				addr, user_dma_pkt->tsize);
5182 		return -EFAULT;
5183 	}
5184 
5185 	if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5186 		memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5187 		*new_dma_pkt_size = sizeof(*user_dma_pkt);
5188 		return 0;
5189 	}
5190 
5191 	user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5192 
5193 	sgt = userptr->sgt;
5194 	dma_desc_cnt = 0;
5195 
5196 	for_each_sgtable_dma_sg(sgt, sg, count) {
5197 		len = sg_dma_len(sg);
5198 		dma_addr = sg_dma_address(sg);
5199 
5200 		if (len == 0)
5201 			break;
5202 
5203 		while ((count + 1) < sgt->nents) {
5204 			sg_next_iter = sg_next(sg);
5205 			len_next = sg_dma_len(sg_next_iter);
5206 			dma_addr_next = sg_dma_address(sg_next_iter);
5207 
5208 			if (len_next == 0)
5209 				break;
5210 
5211 			if ((dma_addr + len == dma_addr_next) &&
5212 				(len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5213 				len += len_next;
5214 				count++;
5215 				sg = sg_next_iter;
5216 			} else {
5217 				break;
5218 			}
5219 		}
5220 
5221 		ctl = le32_to_cpu(user_dma_pkt->ctl);
5222 		if (likely(dma_desc_cnt))
5223 			ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5224 		ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5225 		new_dma_pkt->ctl = cpu_to_le32(ctl);
5226 		new_dma_pkt->tsize = cpu_to_le32(len);
5227 
5228 		if (dir == DMA_TO_DEVICE) {
5229 			new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5230 			new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5231 		} else {
5232 			new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5233 			new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5234 		}
5235 
5236 		if (!user_memset)
5237 			device_memory_addr += len;
5238 		dma_desc_cnt++;
5239 		new_dma_pkt++;
5240 	}
5241 
5242 	if (!dma_desc_cnt) {
5243 		dev_err(hdev->dev,
5244 			"Error of 0 SG entries when patching DMA packet\n");
5245 		return -EFAULT;
5246 	}
5247 
5248 	/* Fix the last dma packet - wrcomp must be as user set it */
5249 	new_dma_pkt--;
5250 	new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5251 
5252 	*new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5253 
5254 	return 0;
5255 }
5256 
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5257 static int gaudi_patch_cb(struct hl_device *hdev,
5258 				struct hl_cs_parser *parser)
5259 {
5260 	u32 cb_parsed_length = 0;
5261 	u32 cb_patched_cur_length = 0;
5262 	int rc = 0;
5263 
5264 	/* cb_user_size is more than 0 so loop will always be executed */
5265 	while (cb_parsed_length < parser->user_cb_size) {
5266 		enum packet_id pkt_id;
5267 		u16 pkt_size;
5268 		u32 new_pkt_size = 0;
5269 		struct gaudi_packet *user_pkt, *kernel_pkt;
5270 
5271 		user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5272 		kernel_pkt = parser->patched_cb->kernel_address +
5273 					cb_patched_cur_length;
5274 
5275 		pkt_id = (enum packet_id) (
5276 				(le64_to_cpu(user_pkt->header) &
5277 				PACKET_HEADER_PACKET_ID_MASK) >>
5278 					PACKET_HEADER_PACKET_ID_SHIFT);
5279 
5280 		if (!validate_packet_id(pkt_id)) {
5281 			dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5282 			rc = -EINVAL;
5283 			break;
5284 		}
5285 
5286 		pkt_size = gaudi_packet_sizes[pkt_id];
5287 		cb_parsed_length += pkt_size;
5288 		if (cb_parsed_length > parser->user_cb_size) {
5289 			dev_err(hdev->dev,
5290 				"packet 0x%x is out of CB boundary\n", pkt_id);
5291 			rc = -EINVAL;
5292 			break;
5293 		}
5294 
5295 		switch (pkt_id) {
5296 		case PACKET_LIN_DMA:
5297 			rc = gaudi_patch_dma_packet(hdev, parser,
5298 					(struct packet_lin_dma *) user_pkt,
5299 					(struct packet_lin_dma *) kernel_pkt,
5300 					&new_pkt_size);
5301 			cb_patched_cur_length += new_pkt_size;
5302 			break;
5303 
5304 		case PACKET_MSG_PROT:
5305 			dev_err(hdev->dev,
5306 				"User not allowed to use MSG_PROT\n");
5307 			rc = -EPERM;
5308 			break;
5309 
5310 		case PACKET_CP_DMA:
5311 			dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5312 			rc = -EPERM;
5313 			break;
5314 
5315 		case PACKET_STOP:
5316 			dev_err(hdev->dev, "User not allowed to use STOP\n");
5317 			rc = -EPERM;
5318 			break;
5319 
5320 		case PACKET_WREG_32:
5321 		case PACKET_WREG_BULK:
5322 		case PACKET_MSG_LONG:
5323 		case PACKET_MSG_SHORT:
5324 		case PACKET_REPEAT:
5325 		case PACKET_FENCE:
5326 		case PACKET_NOP:
5327 		case PACKET_ARB_POINT:
5328 		case PACKET_LOAD_AND_EXE:
5329 			memcpy(kernel_pkt, user_pkt, pkt_size);
5330 			cb_patched_cur_length += pkt_size;
5331 			break;
5332 
5333 		default:
5334 			dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5335 				pkt_id);
5336 			rc = -EINVAL;
5337 			break;
5338 		}
5339 
5340 		if (rc)
5341 			break;
5342 	}
5343 
5344 	return rc;
5345 }
5346 
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5347 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5348 		struct hl_cs_parser *parser)
5349 {
5350 	u64 handle;
5351 	u32 patched_cb_size;
5352 	struct hl_cb *user_cb;
5353 	int rc;
5354 
5355 	/*
5356 	 * The new CB should have space at the end for two MSG_PROT packets:
5357 	 * 1. Optional NOP padding for cacheline alignment
5358 	 * 2. A packet that will act as a completion packet
5359 	 * 3. A packet that will generate MSI interrupt
5360 	 */
5361 	if (parser->completion)
5362 		parser->patched_cb_size = parser->user_cb_size +
5363 				gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5364 	else
5365 		parser->patched_cb_size = parser->user_cb_size;
5366 
5367 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5368 				parser->patched_cb_size, false, false,
5369 				&handle);
5370 
5371 	if (rc) {
5372 		dev_err(hdev->dev,
5373 			"Failed to allocate patched CB for DMA CS %d\n",
5374 			rc);
5375 		return rc;
5376 	}
5377 
5378 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5379 	/* hl_cb_get should never fail */
5380 	if (!parser->patched_cb) {
5381 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5382 		rc = -EFAULT;
5383 		goto out;
5384 	}
5385 
5386 	/*
5387 	 * We are protected from overflow because the check
5388 	 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5389 	 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5390 	 *
5391 	 * There is no option to reach here without going through that check because:
5392 	 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5393 	 *    an external queue.
5394 	 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5395 	 */
5396 	memcpy(parser->patched_cb->kernel_address,
5397 		parser->user_cb->kernel_address,
5398 		parser->user_cb_size);
5399 
5400 	patched_cb_size = parser->patched_cb_size;
5401 
5402 	/* Validate patched CB instead of user CB */
5403 	user_cb = parser->user_cb;
5404 	parser->user_cb = parser->patched_cb;
5405 	rc = gaudi_validate_cb(hdev, parser, true);
5406 	parser->user_cb = user_cb;
5407 
5408 	if (rc) {
5409 		hl_cb_put(parser->patched_cb);
5410 		goto out;
5411 	}
5412 
5413 	if (patched_cb_size != parser->patched_cb_size) {
5414 		dev_err(hdev->dev, "user CB size mismatch\n");
5415 		hl_cb_put(parser->patched_cb);
5416 		rc = -EINVAL;
5417 		goto out;
5418 	}
5419 
5420 out:
5421 	/*
5422 	 * Always call cb destroy here because we still have 1 reference
5423 	 * to it by calling cb_get earlier. After the job will be completed,
5424 	 * cb_put will release it, but here we want to remove it from the
5425 	 * idr
5426 	 */
5427 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5428 
5429 	return rc;
5430 }
5431 
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5432 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5433 		struct hl_cs_parser *parser)
5434 {
5435 	u64 handle;
5436 	int rc;
5437 
5438 	rc = gaudi_validate_cb(hdev, parser, false);
5439 
5440 	if (rc)
5441 		goto free_userptr;
5442 
5443 	rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5444 				parser->patched_cb_size, false, false,
5445 				&handle);
5446 	if (rc) {
5447 		dev_err(hdev->dev,
5448 			"Failed to allocate patched CB for DMA CS %d\n", rc);
5449 		goto free_userptr;
5450 	}
5451 
5452 	parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5453 	/* hl_cb_get should never fail here */
5454 	if (!parser->patched_cb) {
5455 		dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5456 		rc = -EFAULT;
5457 		goto out;
5458 	}
5459 
5460 	rc = gaudi_patch_cb(hdev, parser);
5461 
5462 	if (rc)
5463 		hl_cb_put(parser->patched_cb);
5464 
5465 out:
5466 	/*
5467 	 * Always call cb destroy here because we still have 1 reference
5468 	 * to it by calling cb_get earlier. After the job will be completed,
5469 	 * cb_put will release it, but here we want to remove it from the
5470 	 * idr
5471 	 */
5472 	hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5473 
5474 free_userptr:
5475 	if (rc)
5476 		hl_userptr_delete_list(hdev, parser->job_userptr_list);
5477 	return rc;
5478 }
5479 
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5480 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5481 					struct hl_cs_parser *parser)
5482 {
5483 	struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5484 	struct gaudi_device *gaudi = hdev->asic_specific;
5485 	u32 nic_queue_offset, nic_mask_q_id;
5486 
5487 	if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5488 			(parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5489 		nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5490 		nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5491 
5492 		if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5493 			dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5494 			return -EINVAL;
5495 		}
5496 	}
5497 
5498 	/* For internal queue jobs just check if CB address is valid */
5499 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5500 					parser->user_cb_size,
5501 					asic_prop->sram_user_base_address,
5502 					asic_prop->sram_end_address))
5503 		return 0;
5504 
5505 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5506 					parser->user_cb_size,
5507 					asic_prop->dram_user_base_address,
5508 					asic_prop->dram_end_address))
5509 		return 0;
5510 
5511 	/* PMMU and HPMMU addresses are equal, check only one of them */
5512 	if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5513 					parser->user_cb_size,
5514 					asic_prop->pmmu.start_addr,
5515 					asic_prop->pmmu.end_addr))
5516 		return 0;
5517 
5518 	dev_err(hdev->dev,
5519 		"CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5520 		parser->user_cb, parser->user_cb_size);
5521 
5522 	return -EFAULT;
5523 }
5524 
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5525 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5526 {
5527 	struct gaudi_device *gaudi = hdev->asic_specific;
5528 
5529 	if (parser->queue_type == QUEUE_TYPE_INT)
5530 		return gaudi_parse_cb_no_ext_queue(hdev, parser);
5531 
5532 	if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5533 		return gaudi_parse_cb_mmu(hdev, parser);
5534 	else
5535 		return gaudi_parse_cb_no_mmu(hdev, parser);
5536 }
5537 
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5538 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5539 				u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5540 				u32 msi_vec, bool eb)
5541 {
5542 	struct packet_msg_prot *cq_pkt;
5543 	struct packet_nop *cq_padding;
5544 	u64 msi_addr;
5545 	u32 tmp;
5546 
5547 	cq_padding = kernel_address + original_len;
5548 	cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5549 
5550 	while ((void *)cq_padding < (void *)cq_pkt) {
5551 		cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5552 		cq_padding++;
5553 	}
5554 
5555 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5556 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5557 
5558 	if (eb)
5559 		tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5560 
5561 	cq_pkt->ctl = cpu_to_le32(tmp);
5562 	cq_pkt->value = cpu_to_le32(cq_val);
5563 	cq_pkt->addr = cpu_to_le64(cq_addr);
5564 
5565 	cq_pkt++;
5566 
5567 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5568 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5569 	cq_pkt->ctl = cpu_to_le32(tmp);
5570 	cq_pkt->value = cpu_to_le32(1);
5571 	msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5572 	cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5573 }
5574 
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5575 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5576 {
5577 	WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5578 }
5579 
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5580 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5581 					u32 size, u64 val)
5582 {
5583 	struct packet_lin_dma *lin_dma_pkt;
5584 	struct hl_cs_job *job;
5585 	u32 cb_size, ctl, err_cause;
5586 	struct hl_cb *cb;
5587 	int rc;
5588 
5589 	cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5590 	if (!cb)
5591 		return -EFAULT;
5592 
5593 	lin_dma_pkt = cb->kernel_address;
5594 	memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5595 	cb_size = sizeof(*lin_dma_pkt);
5596 
5597 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5598 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5599 	ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5600 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5601 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5602 
5603 	lin_dma_pkt->ctl = cpu_to_le32(ctl);
5604 	lin_dma_pkt->src_addr = cpu_to_le64(val);
5605 	lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5606 	lin_dma_pkt->tsize = cpu_to_le32(size);
5607 
5608 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5609 	if (!job) {
5610 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5611 		rc = -ENOMEM;
5612 		goto release_cb;
5613 	}
5614 
5615 	/* Verify DMA is OK */
5616 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5617 	if (err_cause && !hdev->init_done) {
5618 		dev_dbg(hdev->dev,
5619 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5620 			err_cause);
5621 		WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5622 	}
5623 
5624 	job->id = 0;
5625 	job->user_cb = cb;
5626 	atomic_inc(&job->user_cb->cs_cnt);
5627 	job->user_cb_size = cb_size;
5628 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5629 	job->patched_cb = job->user_cb;
5630 	job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5631 
5632 	hl_debugfs_add_job(hdev, job);
5633 
5634 	rc = gaudi_send_job_on_qman0(hdev, job);
5635 	hl_debugfs_remove_job(hdev, job);
5636 	kfree(job);
5637 	atomic_dec(&cb->cs_cnt);
5638 
5639 	/* Verify DMA is OK */
5640 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5641 	if (err_cause) {
5642 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5643 		rc = -EIO;
5644 		if (!hdev->init_done) {
5645 			dev_dbg(hdev->dev,
5646 				"Clearing DMA0 engine from errors (cause 0x%x)\n",
5647 				err_cause);
5648 			WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5649 		}
5650 	}
5651 
5652 release_cb:
5653 	hl_cb_put(cb);
5654 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5655 
5656 	return rc;
5657 }
5658 
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5659 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5660 					u32 num_regs, u32 val)
5661 {
5662 	struct packet_msg_long *pkt;
5663 	struct hl_cs_job *job;
5664 	u32 cb_size, ctl;
5665 	struct hl_cb *cb;
5666 	int i, rc;
5667 
5668 	cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5669 
5670 	if (cb_size > SZ_2M) {
5671 		dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5672 		return -ENOMEM;
5673 	}
5674 
5675 	cb = hl_cb_kernel_create(hdev, cb_size, false);
5676 	if (!cb)
5677 		return -EFAULT;
5678 
5679 	pkt = cb->kernel_address;
5680 
5681 	ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5682 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5683 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5684 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5685 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5686 
5687 	for (i = 0; i < num_regs ; i++, pkt++) {
5688 		pkt->ctl = cpu_to_le32(ctl);
5689 		pkt->value = cpu_to_le32(val);
5690 		pkt->addr = cpu_to_le64(reg_base + (i * 4));
5691 	}
5692 
5693 	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5694 	if (!job) {
5695 		dev_err(hdev->dev, "Failed to allocate a new job\n");
5696 		rc = -ENOMEM;
5697 		goto release_cb;
5698 	}
5699 
5700 	job->id = 0;
5701 	job->user_cb = cb;
5702 	atomic_inc(&job->user_cb->cs_cnt);
5703 	job->user_cb_size = cb_size;
5704 	job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5705 	job->patched_cb = job->user_cb;
5706 	job->job_cb_size = cb_size;
5707 
5708 	hl_debugfs_add_job(hdev, job);
5709 
5710 	rc = gaudi_send_job_on_qman0(hdev, job);
5711 	hl_debugfs_remove_job(hdev, job);
5712 	kfree(job);
5713 	atomic_dec(&cb->cs_cnt);
5714 
5715 release_cb:
5716 	hl_cb_put(cb);
5717 	hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5718 
5719 	return rc;
5720 }
5721 
gaudi_restore_sm_registers(struct hl_device * hdev)5722 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5723 {
5724 	u64 base_addr;
5725 	u32 num_regs;
5726 	int rc;
5727 
5728 	base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5729 	num_regs = NUM_OF_SOB_IN_BLOCK;
5730 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5731 	if (rc) {
5732 		dev_err(hdev->dev, "failed resetting SM registers");
5733 		return -ENOMEM;
5734 	}
5735 
5736 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5737 	num_regs = NUM_OF_SOB_IN_BLOCK;
5738 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5739 	if (rc) {
5740 		dev_err(hdev->dev, "failed resetting SM registers");
5741 		return -ENOMEM;
5742 	}
5743 
5744 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5745 	num_regs = NUM_OF_SOB_IN_BLOCK;
5746 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5747 	if (rc) {
5748 		dev_err(hdev->dev, "failed resetting SM registers");
5749 		return -ENOMEM;
5750 	}
5751 
5752 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5753 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5754 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5755 	if (rc) {
5756 		dev_err(hdev->dev, "failed resetting SM registers");
5757 		return -ENOMEM;
5758 	}
5759 
5760 	base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5761 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5762 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5763 	if (rc) {
5764 		dev_err(hdev->dev, "failed resetting SM registers");
5765 		return -ENOMEM;
5766 	}
5767 
5768 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5769 	num_regs = NUM_OF_MONITORS_IN_BLOCK;
5770 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771 	if (rc) {
5772 		dev_err(hdev->dev, "failed resetting SM registers");
5773 		return -ENOMEM;
5774 	}
5775 
5776 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5777 			(GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5778 	num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5779 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5780 	if (rc) {
5781 		dev_err(hdev->dev, "failed resetting SM registers");
5782 		return -ENOMEM;
5783 	}
5784 
5785 	base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5786 			(GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5787 	num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5788 	rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5789 	if (rc) {
5790 		dev_err(hdev->dev, "failed resetting SM registers");
5791 		return -ENOMEM;
5792 	}
5793 
5794 	return 0;
5795 }
5796 
gaudi_restore_dma_registers(struct hl_device * hdev)5797 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5798 {
5799 	u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5800 			mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5801 	int i;
5802 
5803 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5804 		u64 sob_addr = CFG_BASE +
5805 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5806 				(i * sob_delta);
5807 		u32 dma_offset = i * DMA_CORE_OFFSET;
5808 
5809 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5810 				lower_32_bits(sob_addr));
5811 		WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5812 				upper_32_bits(sob_addr));
5813 		WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5814 
5815 		/* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5816 		 * modified by the user for SRAM reduction
5817 		 */
5818 		if (i > 1)
5819 			WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5820 								0x00000001);
5821 	}
5822 }
5823 
gaudi_restore_qm_registers(struct hl_device * hdev)5824 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5825 {
5826 	u32 qman_offset;
5827 	int i;
5828 
5829 	for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5830 		qman_offset = i * DMA_QMAN_OFFSET;
5831 		WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5832 	}
5833 
5834 	for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5835 		qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5836 		WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5837 	}
5838 
5839 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5840 		qman_offset = i * TPC_QMAN_OFFSET;
5841 		WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5842 	}
5843 
5844 	for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5845 		qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5846 				(i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5847 		WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5848 	}
5849 }
5850 
gaudi_restore_user_registers(struct hl_device * hdev)5851 static int gaudi_restore_user_registers(struct hl_device *hdev)
5852 {
5853 	int rc;
5854 
5855 	rc = gaudi_restore_sm_registers(hdev);
5856 	if (rc)
5857 		return rc;
5858 
5859 	gaudi_restore_dma_registers(hdev);
5860 	gaudi_restore_qm_registers(hdev);
5861 
5862 	return 0;
5863 }
5864 
gaudi_context_switch(struct hl_device * hdev,u32 asid)5865 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5866 {
5867 	return 0;
5868 }
5869 
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5870 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5871 {
5872 	u32 size = hdev->asic_prop.mmu_pgt_size +
5873 			hdev->asic_prop.mmu_cache_mng_size;
5874 	struct gaudi_device *gaudi = hdev->asic_specific;
5875 	u64 addr = hdev->asic_prop.mmu_pgt_addr;
5876 
5877 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5878 		return 0;
5879 
5880 	return gaudi_memset_device_memory(hdev, addr, size, 0);
5881 }
5882 
gaudi_restore_phase_topology(struct hl_device * hdev)5883 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5884 {
5885 
5886 }
5887 
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5888 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5889 					u32 size_to_dma, dma_addr_t dma_addr)
5890 {
5891 	u32 err_cause, val;
5892 	u64 dma_offset;
5893 	int rc;
5894 
5895 	dma_offset = dma_id * DMA_CORE_OFFSET;
5896 
5897 	WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5898 	WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5899 	WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5900 	WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5901 	WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5902 	WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5903 			(1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5904 
5905 	rc = hl_poll_timeout(
5906 		hdev,
5907 		mmDMA0_CORE_STS0 + dma_offset,
5908 		val,
5909 		((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5910 		0,
5911 		1000000);
5912 
5913 	if (rc) {
5914 		dev_err(hdev->dev,
5915 			"DMA %d timed-out during reading of 0x%llx\n",
5916 			dma_id, addr);
5917 		return -EIO;
5918 	}
5919 
5920 	/* Verify DMA is OK */
5921 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5922 	if (err_cause) {
5923 		dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5924 		dev_dbg(hdev->dev,
5925 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5926 			err_cause);
5927 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5928 
5929 		return -EIO;
5930 	}
5931 
5932 	return 0;
5933 }
5934 
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5935 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5936 				void *blob_addr)
5937 {
5938 	u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5939 	u32 qm_glbl_sts0, qm_cgm_sts;
5940 	u64 dma_offset, qm_offset;
5941 	dma_addr_t dma_addr;
5942 	void *kernel_addr;
5943 	bool is_eng_idle;
5944 	int rc = 0, dma_id;
5945 
5946 	kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5947 
5948 	if (!kernel_addr)
5949 		return -ENOMEM;
5950 
5951 	hdev->asic_funcs->hw_queues_lock(hdev);
5952 
5953 	dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5954 	dma_offset = dma_id * DMA_CORE_OFFSET;
5955 	qm_offset = dma_id * DMA_QMAN_OFFSET;
5956 	dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5957 	qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5958 	qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5959 	is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5960 		      IS_DMA_IDLE(dma_core_sts0);
5961 
5962 	if (!is_eng_idle) {
5963 		dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5964 		dma_offset = dma_id * DMA_CORE_OFFSET;
5965 		qm_offset = dma_id * DMA_QMAN_OFFSET;
5966 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5967 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5968 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5969 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5970 			      IS_DMA_IDLE(dma_core_sts0);
5971 
5972 		if (!is_eng_idle) {
5973 			dev_err_ratelimited(hdev->dev,
5974 				"Can't read via DMA because it is BUSY\n");
5975 			rc = -EAGAIN;
5976 			goto out;
5977 		}
5978 	}
5979 
5980 	cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5981 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5982 			0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5983 
5984 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
5985 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5986 	 * ASID
5987 	 */
5988 	WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5989 
5990 	/* Verify DMA is OK */
5991 	err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5992 	if (err_cause) {
5993 		dev_dbg(hdev->dev,
5994 			"Clearing DMA0 engine from errors (cause 0x%x)\n",
5995 			err_cause);
5996 		WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5997 	}
5998 
5999 	pos = 0;
6000 	size_left = size;
6001 	size_to_dma = SZ_2M;
6002 
6003 	while (size_left > 0) {
6004 
6005 		if (size_left < SZ_2M)
6006 			size_to_dma = size_left;
6007 
6008 		rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6009 						dma_addr);
6010 		if (rc)
6011 			break;
6012 
6013 		memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6014 
6015 		if (size_left <= SZ_2M)
6016 			break;
6017 
6018 		pos += SZ_2M;
6019 		addr += SZ_2M;
6020 		size_left -= SZ_2M;
6021 	}
6022 
6023 	/* TODO: remove this by mapping the DMA temporary buffer to the MMU
6024 	 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6025 	 * ASID
6026 	 */
6027 	WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6028 			~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6029 
6030 	WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6031 
6032 out:
6033 	hdev->asic_funcs->hw_queues_unlock(hdev);
6034 
6035 	hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6036 
6037 	return rc;
6038 }
6039 
gaudi_read_pte(struct hl_device * hdev,u64 addr)6040 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6041 {
6042 	struct gaudi_device *gaudi = hdev->asic_specific;
6043 
6044 	if (hdev->reset_info.hard_reset_pending)
6045 		return U64_MAX;
6046 
6047 	return readq(hdev->pcie_bar[HBM_BAR_ID] +
6048 			(addr - gaudi->hbm_bar_cur_addr));
6049 }
6050 
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6051 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6052 {
6053 	struct gaudi_device *gaudi = hdev->asic_specific;
6054 
6055 	if (hdev->reset_info.hard_reset_pending)
6056 		return;
6057 
6058 	writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6059 			(addr - gaudi->hbm_bar_cur_addr));
6060 }
6061 
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6062 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6063 {
6064 	/* mask to zero the MMBP and ASID bits */
6065 	WREG32_AND(reg, ~0x7FF);
6066 	WREG32_OR(reg, asid);
6067 }
6068 
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6069 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6070 {
6071 	struct gaudi_device *gaudi = hdev->asic_specific;
6072 
6073 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6074 		return;
6075 
6076 	if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6077 		dev_crit(hdev->dev, "asid %u is too big\n", asid);
6078 		return;
6079 	}
6080 
6081 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085 	gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086 
6087 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091 	gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092 
6093 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097 	gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098 
6099 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103 	gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104 
6105 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109 	gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110 
6111 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6112 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6113 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6114 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6115 	gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6116 
6117 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6118 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6119 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6120 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6121 	gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6122 
6123 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6124 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6125 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6126 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6127 	gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6128 
6129 	gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6130 	gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6131 	gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6132 	gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6133 	gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6134 	gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6135 	gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6136 	gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6137 
6138 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6139 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6140 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6141 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6142 	gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6143 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6144 	gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6145 
6146 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6147 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6148 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6149 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6150 	gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6151 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6152 	gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6153 
6154 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6155 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6156 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6157 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6158 	gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6159 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6160 	gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6161 
6162 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6163 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6164 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6165 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6166 	gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6167 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6168 	gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6169 
6170 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6171 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6172 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6173 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6174 	gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6175 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6176 	gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6177 
6178 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6179 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6180 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6181 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6182 	gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6183 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6184 	gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6185 
6186 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6187 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6188 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6189 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6190 	gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6191 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6192 	gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6193 
6194 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6195 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6196 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6197 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6198 	gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6199 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6200 	gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6201 
6202 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6203 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6204 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6205 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6206 	gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6207 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6208 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6209 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6210 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6211 	gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6212 
6213 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6214 	gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6215 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6216 	gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6217 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6218 	gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6219 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6220 	gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6221 	gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6222 	gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6223 	gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6224 	gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6225 
6226 	if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6227 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6228 				asid);
6229 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6230 				asid);
6231 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6232 				asid);
6233 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6234 				asid);
6235 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6236 				asid);
6237 	}
6238 
6239 	if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6240 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6241 				asid);
6242 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6243 				asid);
6244 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6245 				asid);
6246 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6247 				asid);
6248 		gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6249 				asid);
6250 	}
6251 
6252 	if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6253 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6254 				asid);
6255 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6256 				asid);
6257 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6258 				asid);
6259 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6260 				asid);
6261 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6262 				asid);
6263 	}
6264 
6265 	if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6266 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6267 				asid);
6268 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6269 				asid);
6270 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6271 				asid);
6272 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6273 				asid);
6274 		gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6275 				asid);
6276 	}
6277 
6278 	if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6279 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6280 				asid);
6281 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6282 				asid);
6283 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6284 				asid);
6285 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6286 				asid);
6287 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6288 				asid);
6289 	}
6290 
6291 	if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6292 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6293 				asid);
6294 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6295 				asid);
6296 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6297 				asid);
6298 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6299 				asid);
6300 		gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6301 				asid);
6302 	}
6303 
6304 	if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6305 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6306 				asid);
6307 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6308 				asid);
6309 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6310 				asid);
6311 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6312 				asid);
6313 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6314 				asid);
6315 	}
6316 
6317 	if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6318 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6319 				asid);
6320 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6321 				asid);
6322 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6323 				asid);
6324 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6325 				asid);
6326 		gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6327 				asid);
6328 	}
6329 
6330 	if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6331 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6332 				asid);
6333 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6334 				asid);
6335 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6336 				asid);
6337 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6338 				asid);
6339 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6340 				asid);
6341 	}
6342 
6343 	if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6344 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6345 				asid);
6346 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6347 				asid);
6348 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6349 				asid);
6350 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6351 				asid);
6352 		gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6353 				asid);
6354 	}
6355 
6356 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6357 	gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6358 }
6359 
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6360 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6361 		struct hl_cs_job *job)
6362 {
6363 	struct packet_msg_prot *fence_pkt;
6364 	u32 *fence_ptr;
6365 	dma_addr_t fence_dma_addr;
6366 	struct hl_cb *cb;
6367 	u32 tmp, timeout, dma_offset;
6368 	int rc;
6369 
6370 	if (hdev->pldm)
6371 		timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6372 	else
6373 		timeout = HL_DEVICE_TIMEOUT_USEC;
6374 
6375 	fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6376 	if (!fence_ptr) {
6377 		dev_err(hdev->dev,
6378 			"Failed to allocate fence memory for QMAN0\n");
6379 		return -ENOMEM;
6380 	}
6381 
6382 	cb = job->patched_cb;
6383 
6384 	fence_pkt = cb->kernel_address +
6385 			job->job_cb_size - sizeof(struct packet_msg_prot);
6386 
6387 	tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6388 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6389 	tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6390 
6391 	fence_pkt->ctl = cpu_to_le32(tmp);
6392 	fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6393 	fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6394 
6395 	dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6396 
6397 	WREG32(mmDMA0_CORE_PROT + dma_offset,
6398 			BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6399 
6400 	rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6401 					job->job_cb_size, cb->bus_address);
6402 	if (rc) {
6403 		dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6404 		goto free_fence_ptr;
6405 	}
6406 
6407 	rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6408 				(tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6409 				timeout, true);
6410 
6411 	hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6412 
6413 	if (rc == -ETIMEDOUT) {
6414 		dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6415 		goto free_fence_ptr;
6416 	}
6417 
6418 free_fence_ptr:
6419 	WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6420 
6421 	hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6422 	return rc;
6423 }
6424 
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6425 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6426 {
6427 	if (event_type >= GAUDI_EVENT_SIZE)
6428 		goto event_not_supported;
6429 
6430 	if (!gaudi_irq_map_table[event_type].valid)
6431 		goto event_not_supported;
6432 
6433 	snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6434 
6435 	return;
6436 
6437 event_not_supported:
6438 	snprintf(desc, size, "N/A");
6439 }
6440 
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6441 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6442 							bool is_write, u16 *engine_id_1,
6443 							u16 *engine_id_2)
6444 {
6445 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6446 
6447 	mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6448 				DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6449 
6450 	switch (x_y) {
6451 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6452 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6453 		dma_id[0] = 0;
6454 		dma_id[1] = 2;
6455 		break;
6456 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6457 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6458 		dma_id[0] = 1;
6459 		dma_id[1] = 3;
6460 		break;
6461 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6462 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6463 		dma_id[0] = 4;
6464 		dma_id[1] = 6;
6465 		break;
6466 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6467 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6468 		dma_id[0] = 5;
6469 		dma_id[1] = 7;
6470 		break;
6471 	default:
6472 		goto unknown_initiator;
6473 	}
6474 
6475 	for (i = 0 ; i < 2 ; i++) {
6476 		dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6477 		err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6478 	}
6479 
6480 	switch (x_y) {
6481 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6482 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6483 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6484 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6485 			return "DMA0";
6486 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6487 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6488 			return "DMA2";
6489 		} else {
6490 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6491 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6492 			return "DMA0 or DMA2";
6493 		}
6494 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6495 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6496 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6497 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6498 			return "DMA1";
6499 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6500 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6501 			return "DMA3";
6502 		} else {
6503 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6504 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6505 			return "DMA1 or DMA3";
6506 		}
6507 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6508 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6509 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6510 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6511 			return "DMA4";
6512 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6513 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6514 			return "DMA6";
6515 		} else {
6516 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6517 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6518 			return "DMA4 or DMA6";
6519 		}
6520 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6521 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6522 		if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6523 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6524 			return "DMA5";
6525 		} else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6526 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6527 			return "DMA7";
6528 		} else {
6529 			*engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6530 			*engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6531 			return "DMA5 or DMA7";
6532 		}
6533 	}
6534 
6535 unknown_initiator:
6536 	return "unknown initiator";
6537 }
6538 
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6539 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6540 							u16 *engine_id_1, u16 *engine_id_2)
6541 {
6542 	u32 val, x_y, axi_id;
6543 
6544 	val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6545 				RREG32(mmMMU_UP_RAZWI_READ_ID);
6546 	x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6547 			(RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6548 	axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6549 			RAZWI_INITIATOR_AXI_ID_SHIFT);
6550 
6551 	switch (x_y) {
6552 	case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6553 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6554 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6555 			return "TPC0";
6556 		}
6557 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6558 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6559 			return "NIC0";
6560 		}
6561 		break;
6562 	case RAZWI_INITIATOR_ID_X_Y_TPC1:
6563 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6564 		return "TPC1";
6565 	case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6566 	case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6567 		*engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6568 		return "MME0";
6569 	case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6570 	case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6571 		*engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6572 		return "MME1";
6573 	case RAZWI_INITIATOR_ID_X_Y_TPC2:
6574 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6575 		return "TPC2";
6576 	case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6577 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6578 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6579 			return "TPC3";
6580 		}
6581 		/* PCI, CPU or PSOC does not have engine id*/
6582 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6583 			return "PCI";
6584 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6585 			return "CPU";
6586 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6587 			return "PSOC";
6588 		break;
6589 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6590 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6591 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6592 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6593 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6594 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6595 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6596 	case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6597 		return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6598 				engine_id_1, engine_id_2);
6599 	case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6600 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6601 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6602 			return "TPC4";
6603 		}
6604 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6605 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6606 			return "NIC1";
6607 		}
6608 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6609 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6610 			return "NIC2";
6611 		}
6612 		break;
6613 	case RAZWI_INITIATOR_ID_X_Y_TPC5:
6614 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6615 		return "TPC5";
6616 	case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6617 	case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6618 		*engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6619 		return "MME2";
6620 	case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6621 	case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6622 		*engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6623 		return "MME3";
6624 	case RAZWI_INITIATOR_ID_X_Y_TPC6:
6625 		*engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6626 		return "TPC6";
6627 	case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6628 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6629 			*engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6630 			return "TPC7";
6631 		}
6632 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6633 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6634 			return "NIC4";
6635 		}
6636 		if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6637 			*engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6638 			return "NIC5";
6639 		}
6640 		break;
6641 	default:
6642 		break;
6643 	}
6644 
6645 	dev_err(hdev->dev,
6646 		"Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6647 		val,
6648 		(val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6649 		(val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6650 		(val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6651 			RAZWI_INITIATOR_AXI_ID_MASK);
6652 
6653 	return "unknown initiator";
6654 }
6655 
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u16 * engine_id_1,u16 * engine_id_2,bool * is_read,bool * is_write)6656 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6657 						u16 *engine_id_2, bool *is_read, bool *is_write)
6658 {
6659 
6660 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6661 		dev_err_ratelimited(hdev->dev,
6662 			"RAZWI event caused by illegal write of %s\n",
6663 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6664 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6665 		*is_write = true;
6666 	}
6667 
6668 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6669 		dev_err_ratelimited(hdev->dev,
6670 			"RAZWI event caused by illegal read of %s\n",
6671 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6672 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6673 		*is_read = true;
6674 	}
6675 }
6676 
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u64 * event_mask)6677 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6678 {
6679 	struct gaudi_device *gaudi = hdev->asic_specific;
6680 	u32 val;
6681 
6682 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6683 		return;
6684 
6685 	val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6686 	if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6687 		*addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6688 		*addr <<= 32;
6689 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6690 
6691 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6692 		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6693 
6694 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6695 	}
6696 
6697 	val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6698 	if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6699 		*addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6700 		*addr <<= 32;
6701 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6702 
6703 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6704 
6705 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6706 	}
6707 }
6708 
6709 /*
6710  *  +-------------------+------------------------------------------------------+
6711  *  | Configuration Reg |                     Description                      |
6712  *  |      Address      |                                                      |
6713  *  +-------------------+------------------------------------------------------+
6714  *  |  0xF30 - 0xF3F    |ECC single error indication (1 bit per memory wrapper)|
6715  *  |                   |0xF30 memory wrappers 31:0 (MSB to LSB)               |
6716  *  |                   |0xF34 memory wrappers 63:32                           |
6717  *  |                   |0xF38 memory wrappers 95:64                           |
6718  *  |                   |0xF3C memory wrappers 127:96                          |
6719  *  +-------------------+------------------------------------------------------+
6720  *  |  0xF40 - 0xF4F    |ECC double error indication (1 bit per memory wrapper)|
6721  *  |                   |0xF40 memory wrappers 31:0 (MSB to LSB)               |
6722  *  |                   |0xF44 memory wrappers 63:32                           |
6723  *  |                   |0xF48 memory wrappers 95:64                           |
6724  *  |                   |0xF4C memory wrappers 127:96                          |
6725  *  +-------------------+------------------------------------------------------+
6726  */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6727 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6728 		struct ecc_info_extract_params *params, u64 *ecc_address,
6729 		u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6730 {
6731 	u32 i, num_mem_regs, reg, err_bit;
6732 	u64 err_addr, err_word = 0;
6733 
6734 	num_mem_regs = params->num_memories / 32 +
6735 			((params->num_memories % 32) ? 1 : 0);
6736 
6737 	if (params->block_address >= CFG_BASE)
6738 		params->block_address -= CFG_BASE;
6739 
6740 	if (params->derr)
6741 		err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6742 	else
6743 		err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6744 
6745 	/* Set invalid wrapper index */
6746 	*memory_wrapper_idx = 0xFF;
6747 
6748 	/* Iterate through memory wrappers, a single bit must be set */
6749 	for (i = 0 ; i < num_mem_regs ; i++) {
6750 		err_addr += i * 4;
6751 		err_word = RREG32(err_addr);
6752 		if (err_word) {
6753 			err_bit = __ffs(err_word);
6754 			*memory_wrapper_idx = err_bit + (32 * i);
6755 			break;
6756 		}
6757 	}
6758 
6759 	if (*memory_wrapper_idx == 0xFF) {
6760 		dev_err(hdev->dev, "ECC error information cannot be found\n");
6761 		return -EINVAL;
6762 	}
6763 
6764 	WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6765 			*memory_wrapper_idx);
6766 
6767 	*ecc_address =
6768 		RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6769 	*ecc_syndrom =
6770 		RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6771 
6772 	/* Clear error indication */
6773 	reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6774 	if (params->derr)
6775 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6776 	else
6777 		reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6778 
6779 	WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6780 
6781 	return 0;
6782 }
6783 
6784 /*
6785  * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6786  *
6787  * @idx: the current pi/ci value
6788  * @q_len: the queue length (power of 2)
6789  *
6790  * @return the cyclically decremented index
6791  */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6792 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6793 {
6794 	u32 mask = q_len - 1;
6795 
6796 	/*
6797 	 * modular decrement is equivalent to adding (queue_size -1)
6798 	 * later we take LSBs to make sure the value is in the
6799 	 * range [0, queue_len - 1]
6800 	 */
6801 	return (idx + q_len - 1) & mask;
6802 }
6803 
6804 /**
6805  * gaudi_handle_sw_config_stream_data - print SW config stream data
6806  *
6807  * @hdev: pointer to the habanalabs device structure
6808  * @stream: the QMAN's stream
6809  * @qman_base: base address of QMAN registers block
6810  * @event_mask: mask of the last events occurred
6811  */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6812 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6813 						u64 qman_base, u64 event_mask)
6814 {
6815 	u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6816 	u32 cq_ptr_lo_off, size;
6817 
6818 	cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6819 
6820 	cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6821 						stream * cq_ptr_lo_off;
6822 	cq_ptr_hi = cq_ptr_lo +
6823 				(mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6824 	cq_tsize = cq_ptr_lo +
6825 				(mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6826 
6827 	cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6828 	size = RREG32(cq_tsize);
6829 	dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6830 							stream, cq_ptr, size);
6831 
6832 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6833 		hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6834 		hdev->captured_err_info.undef_opcode.cq_size = size;
6835 		hdev->captured_err_info.undef_opcode.stream_id = stream;
6836 	}
6837 }
6838 
6839 /**
6840  * gaudi_handle_last_pqes_on_err - print last PQEs on error
6841  *
6842  * @hdev: pointer to the habanalabs device structure
6843  * @qid_base: first QID of the QMAN (out of 4 streams)
6844  * @stream: the QMAN's stream
6845  * @qman_base: base address of QMAN registers block
6846  * @event_mask: mask of the last events occurred
6847  * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6848  */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6849 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6850 						u32 stream, u64 qman_base,
6851 						u64 event_mask,
6852 						bool pr_sw_conf)
6853 {
6854 	u32 ci, qm_ci_stream_off, queue_len;
6855 	struct hl_hw_queue *q;
6856 	u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6857 	int i;
6858 
6859 	q = &hdev->kernel_queues[qid_base + stream];
6860 
6861 	qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6862 	pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6863 						stream * qm_ci_stream_off;
6864 
6865 	queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6866 					q->int_queue_len : HL_QUEUE_LENGTH;
6867 
6868 	hdev->asic_funcs->hw_queues_lock(hdev);
6869 
6870 	if (pr_sw_conf)
6871 		gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6872 
6873 	ci = RREG32(pq_ci);
6874 
6875 	/* we should start printing form ci -1 */
6876 	ci = gaudi_queue_idx_dec(ci, queue_len);
6877 	memset(addr, 0, sizeof(addr));
6878 
6879 	for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6880 		struct hl_bd *bd;
6881 		u32 len;
6882 
6883 		bd = q->kernel_address;
6884 		bd += ci;
6885 
6886 		len = le32_to_cpu(bd->len);
6887 		/* len 0 means uninitialized entry- break */
6888 		if (!len)
6889 			break;
6890 
6891 		addr[i] = le64_to_cpu(bd->ptr);
6892 
6893 		dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6894 							stream, ci, addr[i], len);
6895 
6896 		/* get previous ci, wrap if needed */
6897 		ci = gaudi_queue_idx_dec(ci, queue_len);
6898 	}
6899 
6900 	if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6901 		struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6902 		u32 arr_idx = undef_opcode->cb_addr_streams_len;
6903 
6904 		if (arr_idx == 0) {
6905 			undef_opcode->timestamp = ktime_get();
6906 			undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6907 		}
6908 
6909 		memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6910 		undef_opcode->cb_addr_streams_len++;
6911 	}
6912 
6913 	hdev->asic_funcs->hw_queues_unlock(hdev);
6914 }
6915 
6916 /**
6917  * handle_qman_data_on_err - extract QMAN data on error
6918  *
6919  * @hdev: pointer to the habanalabs device structure
6920  * @qid_base: first QID of the QMAN (out of 4 streams)
6921  * @stream: the QMAN's stream
6922  * @qman_base: base address of QMAN registers block
6923  * @event_mask: mask of the last events occurred
6924  *
6925  * This function attempt to exatract as much data as possible on QMAN error.
6926  * On upper CP print the SW config stream data and last 8 PQEs.
6927  * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6928  */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6929 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6930 				   u32 stream, u64 qman_base, u64 event_mask)
6931 {
6932 	u32 i;
6933 
6934 	if (stream != QMAN_STREAMS) {
6935 		gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6936 			qman_base, event_mask, true);
6937 		return;
6938 	}
6939 
6940 	/* handle Lower-CP */
6941 	gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6942 
6943 	for (i = 0; i < QMAN_STREAMS; i++)
6944 		gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6945 			qman_base, event_mask, false);
6946 }
6947 
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)6948 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6949 					  const char *qm_name,
6950 					  u64 qman_base,
6951 					  u32 qid_base,
6952 					  u64 *event_mask)
6953 {
6954 	u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6955 	u64 glbl_sts_addr, arb_err_addr;
6956 	char reg_desc[32];
6957 
6958 	glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6959 	arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6960 
6961 	/* Iterate through all stream GLBL_STS1 registers + Lower CP */
6962 	for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6963 		glbl_sts_clr_val = 0;
6964 		glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6965 
6966 		if (!glbl_sts_val)
6967 			continue;
6968 
6969 		if (i == QMAN_STREAMS)
6970 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6971 		else
6972 			snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6973 
6974 		for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6975 			if (glbl_sts_val & BIT(j)) {
6976 				dev_err_ratelimited(hdev->dev,
6977 						"%s %s. err cause: %s\n",
6978 						qm_name, reg_desc,
6979 						gaudi_qman_error_cause[j]);
6980 				glbl_sts_clr_val |= BIT(j);
6981 			}
6982 		}
6983 		/* check for undefined opcode */
6984 		if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6985 				hdev->captured_err_info.undef_opcode.write_enable) {
6986 			memset(&hdev->captured_err_info.undef_opcode, 0,
6987 						sizeof(hdev->captured_err_info.undef_opcode));
6988 
6989 			hdev->captured_err_info.undef_opcode.write_enable = false;
6990 			*event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6991 		}
6992 
6993 		/* Write 1 clear errors */
6994 		if (!hdev->stop_on_err)
6995 			WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6996 		else
6997 			handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6998 	}
6999 
7000 	arb_err_val = RREG32(arb_err_addr);
7001 
7002 	if (!arb_err_val)
7003 		return;
7004 
7005 	for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7006 		if (arb_err_val & BIT(j)) {
7007 			dev_err_ratelimited(hdev->dev,
7008 					"%s ARB_ERR. err cause: %s\n",
7009 					qm_name,
7010 					gaudi_qman_arb_error_cause[j]);
7011 		}
7012 	}
7013 }
7014 
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7015 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7016 		struct hl_eq_sm_sei_data *sei_data)
7017 {
7018 	u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7019 
7020 	/* Flip the bits as the enum is ordered in the opposite way */
7021 	index = (index ^ 0x3) & 0x3;
7022 
7023 	switch (sei_data->sei_cause) {
7024 	case SM_SEI_SO_OVERFLOW:
7025 		dev_err_ratelimited(hdev->dev,
7026 			"%s SEI Error: SOB Group %u overflow/underflow",
7027 			gaudi_sync_manager_names[index],
7028 			le32_to_cpu(sei_data->sei_log));
7029 		break;
7030 	case SM_SEI_LBW_4B_UNALIGNED:
7031 		dev_err_ratelimited(hdev->dev,
7032 			"%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7033 			gaudi_sync_manager_names[index],
7034 			le32_to_cpu(sei_data->sei_log));
7035 		break;
7036 	case SM_SEI_AXI_RESPONSE_ERR:
7037 		dev_err_ratelimited(hdev->dev,
7038 			"%s SEI Error: AXI ID %u response error",
7039 			gaudi_sync_manager_names[index],
7040 			le32_to_cpu(sei_data->sei_log));
7041 		break;
7042 	default:
7043 		dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7044 				le32_to_cpu(sei_data->sei_log));
7045 		break;
7046 	}
7047 }
7048 
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7049 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7050 		struct hl_eq_ecc_data *ecc_data)
7051 {
7052 	struct ecc_info_extract_params params;
7053 	u64 ecc_address = 0, ecc_syndrom = 0;
7054 	u8 index, memory_wrapper_idx = 0;
7055 	bool extract_info_from_fw;
7056 	int rc;
7057 
7058 	if (hdev->asic_prop.fw_security_enabled) {
7059 		extract_info_from_fw = true;
7060 		goto extract_ecc_info;
7061 	}
7062 
7063 	switch (event_type) {
7064 	case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7065 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7066 		extract_info_from_fw = true;
7067 		break;
7068 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7069 		index = event_type - GAUDI_EVENT_TPC0_SERR;
7070 		params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7071 		params.num_memories = 90;
7072 		params.derr = false;
7073 		extract_info_from_fw = false;
7074 		break;
7075 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7076 		index = event_type - GAUDI_EVENT_TPC0_DERR;
7077 		params.block_address =
7078 			mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7079 		params.num_memories = 90;
7080 		params.derr = true;
7081 		extract_info_from_fw = false;
7082 		break;
7083 	case GAUDI_EVENT_MME0_ACC_SERR:
7084 	case GAUDI_EVENT_MME1_ACC_SERR:
7085 	case GAUDI_EVENT_MME2_ACC_SERR:
7086 	case GAUDI_EVENT_MME3_ACC_SERR:
7087 		index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7088 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7089 		params.num_memories = 128;
7090 		params.derr = false;
7091 		extract_info_from_fw = false;
7092 		break;
7093 	case GAUDI_EVENT_MME0_ACC_DERR:
7094 	case GAUDI_EVENT_MME1_ACC_DERR:
7095 	case GAUDI_EVENT_MME2_ACC_DERR:
7096 	case GAUDI_EVENT_MME3_ACC_DERR:
7097 		index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7098 		params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7099 		params.num_memories = 128;
7100 		params.derr = true;
7101 		extract_info_from_fw = false;
7102 		break;
7103 	case GAUDI_EVENT_MME0_SBAB_SERR:
7104 	case GAUDI_EVENT_MME1_SBAB_SERR:
7105 	case GAUDI_EVENT_MME2_SBAB_SERR:
7106 	case GAUDI_EVENT_MME3_SBAB_SERR:
7107 		index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7108 		params.block_address =
7109 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7110 		params.num_memories = 33;
7111 		params.derr = false;
7112 		extract_info_from_fw = false;
7113 		break;
7114 	case GAUDI_EVENT_MME0_SBAB_DERR:
7115 	case GAUDI_EVENT_MME1_SBAB_DERR:
7116 	case GAUDI_EVENT_MME2_SBAB_DERR:
7117 	case GAUDI_EVENT_MME3_SBAB_DERR:
7118 		index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7119 		params.block_address =
7120 			mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7121 		params.num_memories = 33;
7122 		params.derr = true;
7123 		extract_info_from_fw = false;
7124 		break;
7125 	default:
7126 		return;
7127 	}
7128 
7129 extract_ecc_info:
7130 	if (extract_info_from_fw) {
7131 		ecc_address = le64_to_cpu(ecc_data->ecc_address);
7132 		ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7133 		memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7134 	} else {
7135 		rc = gaudi_extract_ecc_info(hdev, &params, &ecc_address,
7136 				&ecc_syndrom, &memory_wrapper_idx);
7137 		if (rc)
7138 			return;
7139 	}
7140 
7141 	dev_err(hdev->dev,
7142 		"ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7143 		ecc_address, ecc_syndrom, memory_wrapper_idx);
7144 }
7145 
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7146 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7147 {
7148 	u64 qman_base;
7149 	char desc[32];
7150 	u32 qid_base;
7151 	u8 index;
7152 
7153 	switch (event_type) {
7154 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7155 		index = event_type - GAUDI_EVENT_TPC0_QM;
7156 		qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7157 		qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7158 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7159 		break;
7160 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7161 		if (event_type == GAUDI_EVENT_MME0_QM) {
7162 			index = 0;
7163 			qid_base = GAUDI_QUEUE_ID_MME_0_0;
7164 		} else { /* event_type == GAUDI_EVENT_MME2_QM */
7165 			index = 2;
7166 			qid_base = GAUDI_QUEUE_ID_MME_1_0;
7167 		}
7168 		qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7169 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7170 		break;
7171 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7172 		index = event_type - GAUDI_EVENT_DMA0_QM;
7173 		qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7174 		/* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7175 		if (index > 1)
7176 			qid_base++;
7177 		qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7178 		snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7179 		break;
7180 	case GAUDI_EVENT_NIC0_QM0:
7181 		qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7182 		qman_base = mmNIC0_QM0_BASE;
7183 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7184 		break;
7185 	case GAUDI_EVENT_NIC0_QM1:
7186 		qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7187 		qman_base = mmNIC0_QM1_BASE;
7188 		snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7189 		break;
7190 	case GAUDI_EVENT_NIC1_QM0:
7191 		qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7192 		qman_base = mmNIC1_QM0_BASE;
7193 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7194 		break;
7195 	case GAUDI_EVENT_NIC1_QM1:
7196 		qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7197 		qman_base = mmNIC1_QM1_BASE;
7198 		snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7199 		break;
7200 	case GAUDI_EVENT_NIC2_QM0:
7201 		qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7202 		qman_base = mmNIC2_QM0_BASE;
7203 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7204 		break;
7205 	case GAUDI_EVENT_NIC2_QM1:
7206 		qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7207 		qman_base = mmNIC2_QM1_BASE;
7208 		snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7209 		break;
7210 	case GAUDI_EVENT_NIC3_QM0:
7211 		qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7212 		qman_base = mmNIC3_QM0_BASE;
7213 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7214 		break;
7215 	case GAUDI_EVENT_NIC3_QM1:
7216 		qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7217 		qman_base = mmNIC3_QM1_BASE;
7218 		snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7219 		break;
7220 	case GAUDI_EVENT_NIC4_QM0:
7221 		qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7222 		qman_base = mmNIC4_QM0_BASE;
7223 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7224 		break;
7225 	case GAUDI_EVENT_NIC4_QM1:
7226 		qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7227 		qman_base = mmNIC4_QM1_BASE;
7228 		snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7229 		break;
7230 	default:
7231 		return;
7232 	}
7233 
7234 	gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7235 }
7236 
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool check_razwi,u64 * event_mask)7237 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7238 					bool check_razwi, u64 *event_mask)
7239 {
7240 	bool is_read = false, is_write = false;
7241 	u16 engine_id[2], num_of_razwi_eng = 0;
7242 	char desc[64] = "";
7243 	u64 razwi_addr = 0;
7244 	u8 razwi_flags = 0;
7245 
7246 	/*
7247 	 * Init engine id by default as not valid and only if razwi initiated from engine with
7248 	 * engine id it will get valid value.
7249 	 */
7250 	engine_id[0] = HL_RAZWI_NA_ENG_ID;
7251 	engine_id[1] = HL_RAZWI_NA_ENG_ID;
7252 
7253 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
7254 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7255 		event_type, desc);
7256 
7257 	if (check_razwi) {
7258 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7259 						&is_write);
7260 		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7261 
7262 		if (is_read)
7263 			razwi_flags |= HL_RAZWI_READ;
7264 		if (is_write)
7265 			razwi_flags |= HL_RAZWI_WRITE;
7266 
7267 		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7268 			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7269 				num_of_razwi_eng = 2;
7270 			else
7271 				num_of_razwi_eng = 1;
7272 		}
7273 
7274 		if (razwi_flags)
7275 			hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7276 					razwi_flags, event_mask);
7277 	}
7278 }
7279 
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7280 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7281 					struct cpucp_pkt_sync_err *sync_err)
7282 {
7283 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7284 
7285 	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7286 		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7287 }
7288 
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7289 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7290 					struct hl_eq_fw_alive *fw_alive)
7291 {
7292 	dev_err(hdev->dev,
7293 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7294 		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7295 		le32_to_cpu(fw_alive->process_id),
7296 		le32_to_cpu(fw_alive->thread_id),
7297 		le64_to_cpu(fw_alive->uptime_seconds));
7298 }
7299 
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7300 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7301 						void *data)
7302 {
7303 	char desc[64] = "", *type;
7304 	struct eq_nic_sei_event *eq_nic_sei = data;
7305 	u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7306 
7307 	switch (eq_nic_sei->axi_error_cause) {
7308 	case RXB:
7309 		type = "RXB";
7310 		break;
7311 	case RXE:
7312 		type = "RXE";
7313 		break;
7314 	case TXS:
7315 		type = "TXS";
7316 		break;
7317 	case TXE:
7318 		type = "TXE";
7319 		break;
7320 	case QPC_RESP:
7321 		type = "QPC_RESP";
7322 		break;
7323 	case NON_AXI_ERR:
7324 		type = "NON_AXI_ERR";
7325 		break;
7326 	case TMR:
7327 		type = "TMR";
7328 		break;
7329 	default:
7330 		dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7331 			eq_nic_sei->axi_error_cause);
7332 		type = "N/A";
7333 		break;
7334 	}
7335 
7336 	snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7337 			eq_nic_sei->id);
7338 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7339 		event_type, desc);
7340 }
7341 
gaudi_compute_reset_late_init(struct hl_device * hdev)7342 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7343 {
7344 	/* GAUDI doesn't support any reset except hard-reset */
7345 	return -EPERM;
7346 }
7347 
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7348 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7349 			struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7350 {
7351 	u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7352 	int rc = 0;
7353 
7354 	if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7355 					CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7356 		if (!hbm_ecc_data) {
7357 			dev_err(hdev->dev, "No FW ECC data");
7358 			return 0;
7359 		}
7360 
7361 		wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7362 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363 		rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7364 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7365 		ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7366 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7367 		derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7368 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7369 		serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7370 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7371 		type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7372 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7373 		ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7374 				le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7375 
7376 		dev_err(hdev->dev,
7377 			"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7378 			device, ch, wr_par, rd_par, ca_par, serr, derr);
7379 		dev_err(hdev->dev,
7380 			"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7381 			device, ch, hbm_ecc_data->first_addr, type,
7382 			hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7383 			hbm_ecc_data->dec_cnt);
7384 		return 0;
7385 	}
7386 
7387 	if (hdev->asic_prop.fw_security_enabled) {
7388 		dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7389 		return 0;
7390 	}
7391 
7392 	base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7393 	for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7394 		val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7395 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7396 		if (val) {
7397 			rc = -EIO;
7398 			dev_err(hdev->dev,
7399 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7400 				device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7401 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7402 				(val >> 4) & 0x1);
7403 
7404 			val2 = RREG32(base + ch * 0x1000 + 0x060);
7405 			dev_err(hdev->dev,
7406 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7407 				device, ch * 2,
7408 				RREG32(base + ch * 0x1000 + 0x064),
7409 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7410 				(val2 & 0xFF0000) >> 16,
7411 				(val2 & 0xFF000000) >> 24);
7412 		}
7413 
7414 		val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7415 		val = (val & 0xFF) | ((val >> 8) & 0xFF);
7416 		if (val) {
7417 			rc = -EIO;
7418 			dev_err(hdev->dev,
7419 				"HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7420 				device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7421 				(val >> 2) & 0x1, (val >> 3) & 0x1,
7422 				(val >> 4) & 0x1);
7423 
7424 			val2 = RREG32(base + ch * 0x1000 + 0x070);
7425 			dev_err(hdev->dev,
7426 				"HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7427 				device, ch * 2 + 1,
7428 				RREG32(base + ch * 0x1000 + 0x074),
7429 				(val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7430 				(val2 & 0xFF0000) >> 16,
7431 				(val2 & 0xFF000000) >> 24);
7432 		}
7433 
7434 		/* Clear interrupts */
7435 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7436 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7437 		WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7438 		WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7439 		RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7440 		RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7441 	}
7442 
7443 	val  = RREG32(base + 0x8F30);
7444 	val2 = RREG32(base + 0x8F34);
7445 	if (val | val2) {
7446 		rc = -EIO;
7447 		dev_err(hdev->dev,
7448 			"HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7449 			device, val, val2);
7450 	}
7451 	val  = RREG32(base + 0x8F40);
7452 	val2 = RREG32(base + 0x8F44);
7453 	if (val | val2) {
7454 		rc = -EIO;
7455 		dev_err(hdev->dev,
7456 			"HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7457 			device, val, val2);
7458 	}
7459 
7460 	return rc;
7461 }
7462 
gaudi_hbm_event_to_dev(u16 hbm_event_type)7463 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7464 {
7465 	switch (hbm_event_type) {
7466 	case GAUDI_EVENT_HBM0_SPI_0:
7467 	case GAUDI_EVENT_HBM0_SPI_1:
7468 		return 0;
7469 	case GAUDI_EVENT_HBM1_SPI_0:
7470 	case GAUDI_EVENT_HBM1_SPI_1:
7471 		return 1;
7472 	case GAUDI_EVENT_HBM2_SPI_0:
7473 	case GAUDI_EVENT_HBM2_SPI_1:
7474 		return 2;
7475 	case GAUDI_EVENT_HBM3_SPI_0:
7476 	case GAUDI_EVENT_HBM3_SPI_1:
7477 		return 3;
7478 	default:
7479 		break;
7480 	}
7481 
7482 	/* Should never happen */
7483 	return 0;
7484 }
7485 
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7486 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7487 					char *interrupt_name)
7488 {
7489 	u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7490 	bool soft_reset_required = false;
7491 
7492 	tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7493 				TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7494 
7495 	for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7496 		if (tpc_interrupts_cause & BIT(i)) {
7497 			dev_err_ratelimited(hdev->dev,
7498 					"TPC%d_%s interrupt cause: %s\n",
7499 					tpc_id, interrupt_name,
7500 					gaudi_tpc_interrupts_cause[i]);
7501 			/* If this is QM error, we need to soft-reset */
7502 			if (i == 15)
7503 				soft_reset_required = true;
7504 		}
7505 
7506 	/* Clear interrupts */
7507 	WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7508 
7509 	return soft_reset_required;
7510 }
7511 
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7512 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7513 {
7514 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7515 }
7516 
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7517 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7518 {
7519 	return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7520 }
7521 
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)7522 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7523 {
7524 	ktime_t zero_time = ktime_set(0, 0);
7525 
7526 	mutex_lock(&hdev->clk_throttling.lock);
7527 
7528 	switch (event_type) {
7529 	case GAUDI_EVENT_FIX_POWER_ENV_S:
7530 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7531 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7532 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7533 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7534 		dev_info_ratelimited(hdev->dev,
7535 			"Clock throttling due to power consumption\n");
7536 		break;
7537 
7538 	case GAUDI_EVENT_FIX_POWER_ENV_E:
7539 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7540 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7541 		dev_info_ratelimited(hdev->dev,
7542 			"Power envelop is safe, back to optimal clock\n");
7543 		break;
7544 
7545 	case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7546 		hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7547 		hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7548 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7549 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7550 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7551 		dev_info_ratelimited(hdev->dev,
7552 			"Clock throttling due to overheating\n");
7553 		break;
7554 
7555 	case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7556 		hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7557 		hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7558 		*event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7559 		dev_info_ratelimited(hdev->dev,
7560 			"Thermal envelop is safe, back to optimal clock\n");
7561 		break;
7562 
7563 	default:
7564 		dev_err(hdev->dev, "Received invalid clock change event %d\n",
7565 			event_type);
7566 		break;
7567 	}
7568 
7569 	mutex_unlock(&hdev->clk_throttling.lock);
7570 }
7571 
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7572 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7573 {
7574 	struct gaudi_device *gaudi = hdev->asic_specific;
7575 	struct hl_info_fw_err_info fw_err_info;
7576 	u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7577 	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7578 	u32 fw_fatal_err_flag = 0, flags = 0;
7579 	u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7580 			>> EQ_CTL_EVENT_TYPE_SHIFT);
7581 	bool reset_required, reset_direct = false;
7582 	u8 cause;
7583 	int rc;
7584 
7585 	if (event_type >= GAUDI_EVENT_SIZE) {
7586 		dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7587 				event_type, GAUDI_EVENT_SIZE - 1);
7588 		return;
7589 	}
7590 
7591 	gaudi->events_stat[event_type]++;
7592 	gaudi->events_stat_aggregate[event_type]++;
7593 
7594 	switch (event_type) {
7595 	case GAUDI_EVENT_PCIE_CORE_DERR:
7596 	case GAUDI_EVENT_PCIE_IF_DERR:
7597 	case GAUDI_EVENT_PCIE_PHY_DERR:
7598 	case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7599 	case GAUDI_EVENT_MME0_ACC_DERR:
7600 	case GAUDI_EVENT_MME0_SBAB_DERR:
7601 	case GAUDI_EVENT_MME1_ACC_DERR:
7602 	case GAUDI_EVENT_MME1_SBAB_DERR:
7603 	case GAUDI_EVENT_MME2_ACC_DERR:
7604 	case GAUDI_EVENT_MME2_SBAB_DERR:
7605 	case GAUDI_EVENT_MME3_ACC_DERR:
7606 	case GAUDI_EVENT_MME3_SBAB_DERR:
7607 	case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7608 		fallthrough;
7609 	case GAUDI_EVENT_CPU_IF_ECC_DERR:
7610 	case GAUDI_EVENT_PSOC_MEM_DERR:
7611 	case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7612 	case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7613 	case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7614 	case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7615 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7616 	case GAUDI_EVENT_MMU_DERR:
7617 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7618 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7619 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7620 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7621 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7622 		goto reset_device;
7623 
7624 	case GAUDI_EVENT_GIC500:
7625 	case GAUDI_EVENT_AXI_ECC:
7626 	case GAUDI_EVENT_L2_RAM_ECC:
7627 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7628 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7629 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7630 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7631 		goto reset_device;
7632 
7633 	case GAUDI_EVENT_HBM0_SPI_0:
7634 	case GAUDI_EVENT_HBM1_SPI_0:
7635 	case GAUDI_EVENT_HBM2_SPI_0:
7636 	case GAUDI_EVENT_HBM3_SPI_0:
7637 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7638 		gaudi_hbm_read_interrupts(hdev,
7639 				gaudi_hbm_event_to_dev(event_type),
7640 				&eq_entry->hbm_ecc_data);
7641 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7642 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7643 		goto reset_device;
7644 
7645 	case GAUDI_EVENT_HBM0_SPI_1:
7646 	case GAUDI_EVENT_HBM1_SPI_1:
7647 	case GAUDI_EVENT_HBM2_SPI_1:
7648 	case GAUDI_EVENT_HBM3_SPI_1:
7649 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7650 		gaudi_hbm_read_interrupts(hdev,
7651 				gaudi_hbm_event_to_dev(event_type),
7652 				&eq_entry->hbm_ecc_data);
7653 		hl_fw_unmask_irq(hdev, event_type);
7654 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7655 		break;
7656 
7657 	case GAUDI_EVENT_TPC0_DEC:
7658 	case GAUDI_EVENT_TPC1_DEC:
7659 	case GAUDI_EVENT_TPC2_DEC:
7660 	case GAUDI_EVENT_TPC3_DEC:
7661 	case GAUDI_EVENT_TPC4_DEC:
7662 	case GAUDI_EVENT_TPC5_DEC:
7663 	case GAUDI_EVENT_TPC6_DEC:
7664 	case GAUDI_EVENT_TPC7_DEC:
7665 		/* In TPC DEC event, notify on TPC assertion. While there isn't
7666 		 * a specific event for assertion yet, the FW generates TPC DEC event.
7667 		 * The SW upper layer will inspect an internal mapped area to indicate
7668 		 * if the event is a TPC Assertion or a "real" TPC DEC.
7669 		 */
7670 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7671 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7672 		reset_required = gaudi_tpc_read_interrupts(hdev,
7673 					tpc_dec_event_to_tpc_id(event_type),
7674 					"AXI_SLV_DEC_Error");
7675 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7676 		if (reset_required) {
7677 			dev_err(hdev->dev, "reset required due to %s\n",
7678 				gaudi_irq_map_table[event_type].name);
7679 
7680 			reset_direct = true;
7681 			goto reset_device;
7682 		} else {
7683 			hl_fw_unmask_irq(hdev, event_type);
7684 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7685 		}
7686 		break;
7687 
7688 	case GAUDI_EVENT_TPC0_KRN_ERR:
7689 	case GAUDI_EVENT_TPC1_KRN_ERR:
7690 	case GAUDI_EVENT_TPC2_KRN_ERR:
7691 	case GAUDI_EVENT_TPC3_KRN_ERR:
7692 	case GAUDI_EVENT_TPC4_KRN_ERR:
7693 	case GAUDI_EVENT_TPC5_KRN_ERR:
7694 	case GAUDI_EVENT_TPC6_KRN_ERR:
7695 	case GAUDI_EVENT_TPC7_KRN_ERR:
7696 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7697 		reset_required = gaudi_tpc_read_interrupts(hdev,
7698 					tpc_krn_event_to_tpc_id(event_type),
7699 					"KRN_ERR");
7700 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7701 		if (reset_required) {
7702 			dev_err(hdev->dev, "reset required due to %s\n",
7703 				gaudi_irq_map_table[event_type].name);
7704 
7705 			reset_direct = true;
7706 			goto reset_device;
7707 		} else {
7708 			hl_fw_unmask_irq(hdev, event_type);
7709 			event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7710 		}
7711 		break;
7712 
7713 	case GAUDI_EVENT_PCIE_CORE_SERR:
7714 	case GAUDI_EVENT_PCIE_IF_SERR:
7715 	case GAUDI_EVENT_PCIE_PHY_SERR:
7716 	case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7717 	case GAUDI_EVENT_MME0_ACC_SERR:
7718 	case GAUDI_EVENT_MME0_SBAB_SERR:
7719 	case GAUDI_EVENT_MME1_ACC_SERR:
7720 	case GAUDI_EVENT_MME1_SBAB_SERR:
7721 	case GAUDI_EVENT_MME2_ACC_SERR:
7722 	case GAUDI_EVENT_MME2_SBAB_SERR:
7723 	case GAUDI_EVENT_MME3_ACC_SERR:
7724 	case GAUDI_EVENT_MME3_SBAB_SERR:
7725 	case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7726 	case GAUDI_EVENT_CPU_IF_ECC_SERR:
7727 	case GAUDI_EVENT_PSOC_MEM_SERR:
7728 	case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7729 	case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7730 	case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7731 	case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7732 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7733 		fallthrough;
7734 	case GAUDI_EVENT_MMU_SERR:
7735 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7736 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7737 		hl_fw_unmask_irq(hdev, event_type);
7738 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7739 		break;
7740 
7741 	case GAUDI_EVENT_PCIE_DEC:
7742 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
7743 	case GAUDI_EVENT_PSOC_AXI_DEC:
7744 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
7745 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7746 		hl_fw_unmask_irq(hdev, event_type);
7747 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7748 		break;
7749 
7750 	case GAUDI_EVENT_MMU_PAGE_FAULT:
7751 	case GAUDI_EVENT_MMU_WR_PERM:
7752 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7753 		hl_fw_unmask_irq(hdev, event_type);
7754 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7755 		break;
7756 
7757 	case GAUDI_EVENT_MME0_WBC_RSP:
7758 	case GAUDI_EVENT_MME0_SBAB0_RSP:
7759 	case GAUDI_EVENT_MME1_WBC_RSP:
7760 	case GAUDI_EVENT_MME1_SBAB0_RSP:
7761 	case GAUDI_EVENT_MME2_WBC_RSP:
7762 	case GAUDI_EVENT_MME2_SBAB0_RSP:
7763 	case GAUDI_EVENT_MME3_WBC_RSP:
7764 	case GAUDI_EVENT_MME3_SBAB0_RSP:
7765 	case GAUDI_EVENT_RAZWI_OR_ADC:
7766 	case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7767 	case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7768 		fallthrough;
7769 	case GAUDI_EVENT_NIC0_QM0:
7770 	case GAUDI_EVENT_NIC0_QM1:
7771 	case GAUDI_EVENT_NIC1_QM0:
7772 	case GAUDI_EVENT_NIC1_QM1:
7773 	case GAUDI_EVENT_NIC2_QM0:
7774 	case GAUDI_EVENT_NIC2_QM1:
7775 	case GAUDI_EVENT_NIC3_QM0:
7776 	case GAUDI_EVENT_NIC3_QM1:
7777 	case GAUDI_EVENT_NIC4_QM0:
7778 	case GAUDI_EVENT_NIC4_QM1:
7779 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7780 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7781 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7782 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
7783 		hl_fw_unmask_irq(hdev, event_type);
7784 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7785 		break;
7786 
7787 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7788 		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7789 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7790 		goto reset_device;
7791 
7792 	case GAUDI_EVENT_TPC0_BMON_SPMU:
7793 	case GAUDI_EVENT_TPC1_BMON_SPMU:
7794 	case GAUDI_EVENT_TPC2_BMON_SPMU:
7795 	case GAUDI_EVENT_TPC3_BMON_SPMU:
7796 	case GAUDI_EVENT_TPC4_BMON_SPMU:
7797 	case GAUDI_EVENT_TPC5_BMON_SPMU:
7798 	case GAUDI_EVENT_TPC6_BMON_SPMU:
7799 	case GAUDI_EVENT_TPC7_BMON_SPMU:
7800 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7801 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7802 		hl_fw_unmask_irq(hdev, event_type);
7803 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7804 		break;
7805 
7806 	case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7807 		gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7808 		hl_fw_unmask_irq(hdev, event_type);
7809 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7810 		break;
7811 
7812 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7813 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7814 		gaudi_print_sm_sei_info(hdev, event_type,
7815 					&eq_entry->sm_sei_data);
7816 		rc = hl_state_dump(hdev);
7817 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7818 		if (rc)
7819 			dev_err(hdev->dev,
7820 				"Error during system state dump %d\n", rc);
7821 		hl_fw_unmask_irq(hdev, event_type);
7822 		break;
7823 
7824 	case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7825 		break;
7826 
7827 	case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7828 		gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7829 		hl_fw_unmask_irq(hdev, event_type);
7830 		break;
7831 
7832 	case GAUDI_EVENT_PSOC_GPIO_U16_0:
7833 		cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7834 		dev_err(hdev->dev,
7835 			"Received high temp H/W interrupt %d (cause %d)\n",
7836 			event_type, cause);
7837 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7838 		break;
7839 
7840 	case GAUDI_EVENT_DEV_RESET_REQ:
7841 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7842 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7843 		goto reset_device;
7844 
7845 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7846 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7847 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7848 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7849 		goto reset_device;
7850 
7851 	case GAUDI_EVENT_FW_ALIVE_S:
7852 		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7853 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7854 		fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7855 		fw_err_info.event_id = event_type;
7856 		fw_err_info.event_mask = &event_mask;
7857 		hl_handle_fw_err(hdev, &fw_err_info);
7858 		goto reset_device;
7859 
7860 	default:
7861 		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7862 				event_type);
7863 		break;
7864 	}
7865 
7866 	if (event_mask)
7867 		hl_notifier_event_send_all(hdev, event_mask);
7868 
7869 	return;
7870 
7871 reset_device:
7872 	reset_required = true;
7873 
7874 	if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7875 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7876 
7877 		/* notify on device unavailable while the reset triggered by fw */
7878 		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7879 					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7880 	} else if (hdev->hard_reset_on_fw_events) {
7881 		flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7882 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7883 	} else {
7884 		reset_required = false;
7885 	}
7886 
7887 	if (reset_required) {
7888 		/* escalate general hw errors to critical/fatal error */
7889 		if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7890 			hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7891 
7892 		hl_device_cond_reset(hdev, flags, event_mask);
7893 	} else {
7894 		hl_fw_unmask_irq(hdev, event_type);
7895 		/* Notification on occurred event needs to be sent although reset is not executed */
7896 		if (event_mask)
7897 			hl_notifier_event_send_all(hdev, event_mask);
7898 	}
7899 }
7900 
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7901 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7902 {
7903 	struct gaudi_device *gaudi = hdev->asic_specific;
7904 
7905 	if (aggregate) {
7906 		*size = (u32) sizeof(gaudi->events_stat_aggregate);
7907 		return gaudi->events_stat_aggregate;
7908 	}
7909 
7910 	*size = (u32) sizeof(gaudi->events_stat);
7911 	return gaudi->events_stat;
7912 }
7913 
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7914 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7915 {
7916 	struct gaudi_device *gaudi = hdev->asic_specific;
7917 	u32 status, timeout_usec;
7918 	int rc;
7919 
7920 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7921 		hdev->reset_info.hard_reset_pending)
7922 		return 0;
7923 
7924 	if (hdev->pldm)
7925 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7926 	else
7927 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7928 
7929 	/* L0 & L1 invalidation */
7930 	WREG32(mmSTLB_INV_PS, 3);
7931 	WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7932 	WREG32(mmSTLB_INV_PS, 2);
7933 
7934 	rc = hl_poll_timeout(
7935 		hdev,
7936 		mmSTLB_INV_PS,
7937 		status,
7938 		!status,
7939 		1000,
7940 		timeout_usec);
7941 
7942 	WREG32(mmSTLB_INV_SET, 0);
7943 
7944 	return rc;
7945 }
7946 
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)7947 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7948 						bool is_hard, u32 flags,
7949 						u32 asid, u64 va, u64 size)
7950 {
7951 	/* Treat as invalidate all because there is no range invalidation
7952 	 * in Gaudi
7953 	 */
7954 	return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7955 }
7956 
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7957 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7958 {
7959 	u32 status, timeout_usec;
7960 	int rc;
7961 
7962 	if (hdev->pldm)
7963 		timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7964 	else
7965 		timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7966 
7967 	WREG32(MMU_ASID, asid);
7968 	WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7969 	WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7970 	WREG32(MMU_BUSY, 0x80000000);
7971 
7972 	rc = hl_poll_timeout(
7973 		hdev,
7974 		MMU_BUSY,
7975 		status,
7976 		!(status & 0x80000000),
7977 		1000,
7978 		timeout_usec);
7979 
7980 	if (rc) {
7981 		dev_err(hdev->dev,
7982 			"Timeout during MMU hop0 config of asid %d\n", asid);
7983 		return rc;
7984 	}
7985 
7986 	return 0;
7987 }
7988 
gaudi_send_heartbeat(struct hl_device * hdev)7989 static int gaudi_send_heartbeat(struct hl_device *hdev)
7990 {
7991 	struct gaudi_device *gaudi = hdev->asic_specific;
7992 
7993 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7994 		return 0;
7995 
7996 	return hl_fw_send_heartbeat(hdev);
7997 }
7998 
gaudi_cpucp_info_get(struct hl_device * hdev)7999 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8000 {
8001 	struct gaudi_device *gaudi = hdev->asic_specific;
8002 	struct asic_fixed_properties *prop = &hdev->asic_prop;
8003 	int rc;
8004 
8005 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8006 		return 0;
8007 
8008 	rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8009 					mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8010 					mmCPU_BOOT_ERR1);
8011 	if (rc)
8012 		return rc;
8013 
8014 	if (!strlen(prop->cpucp_info.card_name))
8015 		strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8016 				CARD_NAME_MAX_LEN);
8017 
8018 	hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8019 
8020 	set_default_power_values(hdev);
8021 
8022 	return 0;
8023 }
8024 
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8025 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8026 		struct engines_data *e)
8027 {
8028 	struct gaudi_device *gaudi = hdev->asic_specific;
8029 	const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8030 	const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8031 	const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8032 	unsigned long *mask = (unsigned long *)mask_arr;
8033 	u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8034 	bool is_idle = true, is_eng_idle, is_slave;
8035 	u64 offset;
8036 	int i, dma_id, port;
8037 
8038 	if (e)
8039 		hl_engine_data_sprintf(e,
8040 			"\nDMA  is_idle  QM_GLBL_STS0  QM_CGM_STS  DMA_CORE_STS0\n"
8041 			"---  -------  ------------  ----------  -------------\n");
8042 
8043 	for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8044 		dma_id = gaudi_dma_assignment[i];
8045 		offset = dma_id * DMA_QMAN_OFFSET;
8046 
8047 		qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8048 		qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8049 		dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8050 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8051 				IS_DMA_IDLE(dma_core_sts0);
8052 		is_idle &= is_eng_idle;
8053 
8054 		if (mask && !is_eng_idle)
8055 			set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8056 		if (e)
8057 			hl_engine_data_sprintf(e, fmt, dma_id,
8058 				is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8059 				qm_cgm_sts, dma_core_sts0);
8060 	}
8061 
8062 	if (e)
8063 		hl_engine_data_sprintf(e,
8064 			"\nTPC  is_idle  QM_GLBL_STS0  QM_CGM_STS  CFG_STATUS\n"
8065 			"---  -------  ------------  ----------  ----------\n");
8066 
8067 	for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8068 		offset = i * TPC_QMAN_OFFSET;
8069 		qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8070 		qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8071 		tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8072 		is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8073 				IS_TPC_IDLE(tpc_cfg_sts);
8074 		is_idle &= is_eng_idle;
8075 
8076 		if (mask && !is_eng_idle)
8077 			set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8078 		if (e)
8079 			hl_engine_data_sprintf(e, fmt, i,
8080 				is_eng_idle ? "Y" : "N",
8081 				qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8082 	}
8083 
8084 	if (e)
8085 		hl_engine_data_sprintf(e,
8086 			"\nMME  is_idle  QM_GLBL_STS0  QM_CGM_STS  ARCH_STATUS\n"
8087 			"---  -------  ------------  ----------  -----------\n");
8088 
8089 	for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8090 		offset = i * MME_QMAN_OFFSET;
8091 		mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8092 		is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8093 
8094 		/* MME 1 & 3 are slaves, no need to check their QMANs */
8095 		is_slave = i % 2;
8096 		if (!is_slave) {
8097 			qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8098 			qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8099 			is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8100 		}
8101 
8102 		is_idle &= is_eng_idle;
8103 
8104 		if (mask && !is_eng_idle)
8105 			set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8106 		if (e) {
8107 			if (!is_slave)
8108 				hl_engine_data_sprintf(e, fmt, i,
8109 					is_eng_idle ? "Y" : "N",
8110 					qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8111 			else
8112 				hl_engine_data_sprintf(e, mme_slave_fmt, i,
8113 					is_eng_idle ? "Y" : "N", "-",
8114 					"-", mme_arch_sts);
8115 		}
8116 	}
8117 
8118 	if (e)
8119 		hl_engine_data_sprintf(e,
8120 				"\nNIC  is_idle  QM_GLBL_STS0  QM_CGM_STS\n"
8121 				"---  -------  ------------  ----------\n");
8122 
8123 	for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8124 		offset = i * NIC_MACRO_QMAN_OFFSET;
8125 		port = 2 * i;
8126 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8127 			qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8128 			qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8129 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8130 			is_idle &= is_eng_idle;
8131 
8132 			if (mask && !is_eng_idle)
8133 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8134 			if (e)
8135 				hl_engine_data_sprintf(e, nic_fmt, port,
8136 						is_eng_idle ? "Y" : "N",
8137 						qm_glbl_sts0, qm_cgm_sts);
8138 		}
8139 
8140 		port = 2 * i + 1;
8141 		if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8142 			qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8143 			qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8144 			is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8145 			is_idle &= is_eng_idle;
8146 
8147 			if (mask && !is_eng_idle)
8148 				set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8149 			if (e)
8150 				hl_engine_data_sprintf(e, nic_fmt, port,
8151 						is_eng_idle ? "Y" : "N",
8152 						qm_glbl_sts0, qm_cgm_sts);
8153 		}
8154 	}
8155 
8156 	if (e)
8157 		hl_engine_data_sprintf(e, "\n");
8158 
8159 	return is_idle;
8160 }
8161 
gaudi_hw_queues_lock(struct hl_device * hdev)8162 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8163 	__acquires(&gaudi->hw_queues_lock)
8164 {
8165 	struct gaudi_device *gaudi = hdev->asic_specific;
8166 
8167 	spin_lock(&gaudi->hw_queues_lock);
8168 }
8169 
gaudi_hw_queues_unlock(struct hl_device * hdev)8170 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8171 	__releases(&gaudi->hw_queues_lock)
8172 {
8173 	struct gaudi_device *gaudi = hdev->asic_specific;
8174 
8175 	spin_unlock(&gaudi->hw_queues_lock);
8176 }
8177 
gaudi_get_pci_id(struct hl_device * hdev)8178 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8179 {
8180 	return hdev->pdev->device;
8181 }
8182 
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8183 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8184 				size_t max_size)
8185 {
8186 	struct gaudi_device *gaudi = hdev->asic_specific;
8187 
8188 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8189 		return 0;
8190 
8191 	return hl_fw_get_eeprom_data(hdev, data, max_size);
8192 }
8193 
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8194 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8195 {
8196 	struct gaudi_device *gaudi = hdev->asic_specific;
8197 
8198 	if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8199 		return 0;
8200 
8201 	return hl_fw_get_monitor_dump(hdev, data);
8202 }
8203 
8204 /*
8205  * this function should be used only during initialization and/or after reset,
8206  * when there are no active users.
8207  */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8208 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,	u32 tpc_id)
8209 {
8210 	u64 kernel_timeout;
8211 	u32 status, offset;
8212 	int rc;
8213 
8214 	offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8215 
8216 	if (hdev->pldm)
8217 		kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8218 	else
8219 		kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8220 
8221 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8222 			lower_32_bits(tpc_kernel));
8223 	WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8224 			upper_32_bits(tpc_kernel));
8225 
8226 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8227 			lower_32_bits(tpc_kernel));
8228 	WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8229 			upper_32_bits(tpc_kernel));
8230 	/* set a valid LUT pointer, content is of no significance */
8231 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8232 			lower_32_bits(tpc_kernel));
8233 	WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8234 			upper_32_bits(tpc_kernel));
8235 
8236 	WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8237 			lower_32_bits(CFG_BASE +
8238 				mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8239 
8240 	WREG32(mmTPC0_CFG_TPC_CMD + offset,
8241 			(1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8242 			1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8243 	/* wait a bit for the engine to start executing */
8244 	usleep_range(1000, 1500);
8245 
8246 	/* wait until engine has finished executing */
8247 	rc = hl_poll_timeout(
8248 		hdev,
8249 		mmTPC0_CFG_STATUS + offset,
8250 		status,
8251 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8252 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8253 		1000,
8254 		kernel_timeout);
8255 
8256 	if (rc) {
8257 		dev_err(hdev->dev,
8258 			"Timeout while waiting for TPC%d icache prefetch\n",
8259 			tpc_id);
8260 		return -EIO;
8261 	}
8262 
8263 	WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8264 			1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8265 
8266 	/* wait a bit for the engine to start executing */
8267 	usleep_range(1000, 1500);
8268 
8269 	/* wait until engine has finished executing */
8270 	rc = hl_poll_timeout(
8271 		hdev,
8272 		mmTPC0_CFG_STATUS + offset,
8273 		status,
8274 		(status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8275 				TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8276 		1000,
8277 		kernel_timeout);
8278 
8279 	if (rc) {
8280 		dev_err(hdev->dev,
8281 			"Timeout while waiting for TPC%d vector pipe\n",
8282 			tpc_id);
8283 		return -EIO;
8284 	}
8285 
8286 	rc = hl_poll_timeout(
8287 		hdev,
8288 		mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8289 		status,
8290 		(status == 0),
8291 		1000,
8292 		kernel_timeout);
8293 
8294 	if (rc) {
8295 		dev_err(hdev->dev,
8296 			"Timeout while waiting for TPC%d kernel to execute\n",
8297 			tpc_id);
8298 		return -EIO;
8299 	}
8300 
8301 	return 0;
8302 }
8303 
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8304 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8305 		struct hl_ctx *ctx)
8306 {
8307 	struct gaudi_device *gaudi = hdev->asic_specific;
8308 	int min_alloc_order, rc, collective_cb_size;
8309 
8310 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8311 		return 0;
8312 
8313 	hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8314 							HOST_SPACE_INTERNAL_CB_SZ,
8315 							&hdev->internal_cb_pool_dma_addr,
8316 							GFP_KERNEL | __GFP_ZERO);
8317 
8318 	if (!hdev->internal_cb_pool_virt_addr)
8319 		return -ENOMEM;
8320 
8321 	collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8322 			sizeof(struct packet_fence);
8323 	min_alloc_order = ilog2(collective_cb_size);
8324 
8325 	hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8326 	if (!hdev->internal_cb_pool) {
8327 		dev_err(hdev->dev,
8328 			"Failed to create internal CB pool\n");
8329 		rc = -ENOMEM;
8330 		goto free_internal_cb_pool;
8331 	}
8332 
8333 	rc = gen_pool_add(hdev->internal_cb_pool,
8334 				(uintptr_t) hdev->internal_cb_pool_virt_addr,
8335 				HOST_SPACE_INTERNAL_CB_SZ, -1);
8336 	if (rc) {
8337 		dev_err(hdev->dev,
8338 			"Failed to add memory to internal CB pool\n");
8339 		rc = -EFAULT;
8340 		goto destroy_internal_cb_pool;
8341 	}
8342 
8343 	hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8344 			HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8345 			HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8346 
8347 	if (!hdev->internal_cb_va_base) {
8348 		rc = -ENOMEM;
8349 		goto destroy_internal_cb_pool;
8350 	}
8351 
8352 	mutex_lock(&hdev->mmu_lock);
8353 
8354 	rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8355 			hdev->internal_cb_pool_dma_addr,
8356 			HOST_SPACE_INTERNAL_CB_SZ);
8357 	if (rc)
8358 		goto unreserve_internal_cb_pool;
8359 
8360 	rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8361 	if (rc)
8362 		goto unmap_internal_cb_pool;
8363 
8364 	mutex_unlock(&hdev->mmu_lock);
8365 
8366 	return 0;
8367 
8368 unmap_internal_cb_pool:
8369 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8370 			HOST_SPACE_INTERNAL_CB_SZ);
8371 unreserve_internal_cb_pool:
8372 	mutex_unlock(&hdev->mmu_lock);
8373 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8374 			HOST_SPACE_INTERNAL_CB_SZ);
8375 destroy_internal_cb_pool:
8376 	gen_pool_destroy(hdev->internal_cb_pool);
8377 free_internal_cb_pool:
8378 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8379 					hdev->internal_cb_pool_dma_addr);
8380 
8381 	return rc;
8382 }
8383 
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8384 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8385 		struct hl_ctx *ctx)
8386 {
8387 	struct gaudi_device *gaudi = hdev->asic_specific;
8388 
8389 	if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8390 		return;
8391 
8392 	mutex_lock(&hdev->mmu_lock);
8393 	hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8394 			HOST_SPACE_INTERNAL_CB_SZ);
8395 	hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8396 			HOST_SPACE_INTERNAL_CB_SZ);
8397 	hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8398 	mutex_unlock(&hdev->mmu_lock);
8399 
8400 	gen_pool_destroy(hdev->internal_cb_pool);
8401 
8402 	hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8403 					hdev->internal_cb_pool_dma_addr);
8404 }
8405 
gaudi_ctx_init(struct hl_ctx * ctx)8406 static int gaudi_ctx_init(struct hl_ctx *ctx)
8407 {
8408 	int rc;
8409 
8410 	if (ctx->asid == HL_KERNEL_ASID_ID)
8411 		return 0;
8412 
8413 	rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8414 	if (rc)
8415 		return rc;
8416 
8417 	rc = gaudi_restore_user_registers(ctx->hdev);
8418 	if (rc)
8419 		gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8420 
8421 	return rc;
8422 }
8423 
gaudi_ctx_fini(struct hl_ctx * ctx)8424 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8425 {
8426 	if (ctx->asid == HL_KERNEL_ASID_ID)
8427 		return;
8428 
8429 	gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8430 }
8431 
gaudi_pre_schedule_cs(struct hl_cs * cs)8432 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8433 {
8434 	return 0;
8435 }
8436 
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8437 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8438 {
8439 	return gaudi_cq_assignment[cq_idx];
8440 }
8441 
gaudi_get_signal_cb_size(struct hl_device * hdev)8442 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8443 {
8444 	return sizeof(struct packet_msg_short) +
8445 			sizeof(struct packet_msg_prot) * 2;
8446 }
8447 
gaudi_get_wait_cb_size(struct hl_device * hdev)8448 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8449 {
8450 	return sizeof(struct packet_msg_short) * 4 +
8451 			sizeof(struct packet_fence) +
8452 			sizeof(struct packet_msg_prot) * 2;
8453 }
8454 
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8455 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8456 {
8457 	return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8458 }
8459 
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8460 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8461 				u32 size, bool eb)
8462 {
8463 	struct hl_cb *cb = (struct hl_cb *) data;
8464 	struct packet_msg_short *pkt;
8465 	u32 value, ctl, pkt_size = sizeof(*pkt);
8466 
8467 	pkt = cb->kernel_address + size;
8468 	memset(pkt, 0, pkt_size);
8469 
8470 	/* Inc by 1, Mode ADD */
8471 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8472 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8473 
8474 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8475 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8476 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8477 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8478 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8479 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8480 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8481 
8482 	pkt->value = cpu_to_le32(value);
8483 	pkt->ctl = cpu_to_le32(ctl);
8484 
8485 	return size + pkt_size;
8486 }
8487 
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8488 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8489 					u16 addr)
8490 {
8491 	u32 ctl, pkt_size = sizeof(*pkt);
8492 
8493 	memset(pkt, 0, pkt_size);
8494 
8495 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8496 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
8497 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8498 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8499 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8500 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8501 
8502 	pkt->value = cpu_to_le32(value);
8503 	pkt->ctl = cpu_to_le32(ctl);
8504 
8505 	return pkt_size;
8506 }
8507 
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8508 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8509 		struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8510 		u16 sob_val, u16 mon_id)
8511 {
8512 	u64 monitor_base;
8513 	u32 ctl, value, pkt_size = sizeof(*pkt);
8514 	u16 msg_addr_offset;
8515 	u8 mask;
8516 
8517 	if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8518 		dev_err(hdev->dev,
8519 			"sob_base %u (mask %#x) is not valid\n",
8520 			sob_base, sob_mask);
8521 		return 0;
8522 	}
8523 
8524 	/*
8525 	 * monitor_base should be the content of the base0 address registers,
8526 	 * so it will be added to the msg short offsets
8527 	 */
8528 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8529 
8530 	msg_addr_offset =
8531 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8532 				monitor_base;
8533 
8534 	memset(pkt, 0, pkt_size);
8535 
8536 	/* Monitor config packet: bind the monitor to a sync object */
8537 	value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8538 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8539 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8540 			0); /* GREATER OR EQUAL*/
8541 	value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8542 
8543 	ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8544 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8545 	ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8546 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8547 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8548 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8549 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8550 
8551 	pkt->value = cpu_to_le32(value);
8552 	pkt->ctl = cpu_to_le32(ctl);
8553 
8554 	return pkt_size;
8555 }
8556 
gaudi_add_fence_pkt(struct packet_fence * pkt)8557 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8558 {
8559 	u32 ctl, cfg, pkt_size = sizeof(*pkt);
8560 
8561 	memset(pkt, 0, pkt_size);
8562 
8563 	cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8564 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8565 	cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8566 
8567 	ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8568 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8569 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8570 	ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8571 
8572 	pkt->cfg = cpu_to_le32(cfg);
8573 	pkt->ctl = cpu_to_le32(ctl);
8574 
8575 	return pkt_size;
8576 }
8577 
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8578 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8579 {
8580 	u32 offset, nic_index;
8581 
8582 	switch (queue_id) {
8583 	case GAUDI_QUEUE_ID_DMA_0_0:
8584 		offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8585 		break;
8586 	case GAUDI_QUEUE_ID_DMA_0_1:
8587 		offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8588 		break;
8589 	case GAUDI_QUEUE_ID_DMA_0_2:
8590 		offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8591 		break;
8592 	case GAUDI_QUEUE_ID_DMA_0_3:
8593 		offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8594 		break;
8595 	case GAUDI_QUEUE_ID_DMA_1_0:
8596 		offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8597 		break;
8598 	case GAUDI_QUEUE_ID_DMA_1_1:
8599 		offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8600 		break;
8601 	case GAUDI_QUEUE_ID_DMA_1_2:
8602 		offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8603 		break;
8604 	case GAUDI_QUEUE_ID_DMA_1_3:
8605 		offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8606 		break;
8607 	case GAUDI_QUEUE_ID_DMA_5_0:
8608 		offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8609 		break;
8610 	case GAUDI_QUEUE_ID_DMA_5_1:
8611 		offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8612 		break;
8613 	case GAUDI_QUEUE_ID_DMA_5_2:
8614 		offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8615 		break;
8616 	case GAUDI_QUEUE_ID_DMA_5_3:
8617 		offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8618 		break;
8619 	case GAUDI_QUEUE_ID_TPC_7_0:
8620 		offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8621 		break;
8622 	case GAUDI_QUEUE_ID_TPC_7_1:
8623 		offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8624 		break;
8625 	case GAUDI_QUEUE_ID_TPC_7_2:
8626 		offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8627 		break;
8628 	case GAUDI_QUEUE_ID_TPC_7_3:
8629 		offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8630 		break;
8631 	case GAUDI_QUEUE_ID_NIC_0_0:
8632 	case GAUDI_QUEUE_ID_NIC_1_0:
8633 	case GAUDI_QUEUE_ID_NIC_2_0:
8634 	case GAUDI_QUEUE_ID_NIC_3_0:
8635 	case GAUDI_QUEUE_ID_NIC_4_0:
8636 	case GAUDI_QUEUE_ID_NIC_5_0:
8637 	case GAUDI_QUEUE_ID_NIC_6_0:
8638 	case GAUDI_QUEUE_ID_NIC_7_0:
8639 	case GAUDI_QUEUE_ID_NIC_8_0:
8640 	case GAUDI_QUEUE_ID_NIC_9_0:
8641 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8642 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8643 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8644 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8645 		break;
8646 	case GAUDI_QUEUE_ID_NIC_0_1:
8647 	case GAUDI_QUEUE_ID_NIC_1_1:
8648 	case GAUDI_QUEUE_ID_NIC_2_1:
8649 	case GAUDI_QUEUE_ID_NIC_3_1:
8650 	case GAUDI_QUEUE_ID_NIC_4_1:
8651 	case GAUDI_QUEUE_ID_NIC_5_1:
8652 	case GAUDI_QUEUE_ID_NIC_6_1:
8653 	case GAUDI_QUEUE_ID_NIC_7_1:
8654 	case GAUDI_QUEUE_ID_NIC_8_1:
8655 	case GAUDI_QUEUE_ID_NIC_9_1:
8656 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8657 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8658 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8659 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8660 		break;
8661 	case GAUDI_QUEUE_ID_NIC_0_2:
8662 	case GAUDI_QUEUE_ID_NIC_1_2:
8663 	case GAUDI_QUEUE_ID_NIC_2_2:
8664 	case GAUDI_QUEUE_ID_NIC_3_2:
8665 	case GAUDI_QUEUE_ID_NIC_4_2:
8666 	case GAUDI_QUEUE_ID_NIC_5_2:
8667 	case GAUDI_QUEUE_ID_NIC_6_2:
8668 	case GAUDI_QUEUE_ID_NIC_7_2:
8669 	case GAUDI_QUEUE_ID_NIC_8_2:
8670 	case GAUDI_QUEUE_ID_NIC_9_2:
8671 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8672 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8673 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8674 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8675 		break;
8676 	case GAUDI_QUEUE_ID_NIC_0_3:
8677 	case GAUDI_QUEUE_ID_NIC_1_3:
8678 	case GAUDI_QUEUE_ID_NIC_2_3:
8679 	case GAUDI_QUEUE_ID_NIC_3_3:
8680 	case GAUDI_QUEUE_ID_NIC_4_3:
8681 	case GAUDI_QUEUE_ID_NIC_5_3:
8682 	case GAUDI_QUEUE_ID_NIC_6_3:
8683 	case GAUDI_QUEUE_ID_NIC_7_3:
8684 	case GAUDI_QUEUE_ID_NIC_8_3:
8685 	case GAUDI_QUEUE_ID_NIC_9_3:
8686 		nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8687 		offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8688 				(nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8689 				(nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8690 		break;
8691 	default:
8692 		return -EINVAL;
8693 	}
8694 
8695 	*addr = CFG_BASE + offset;
8696 
8697 	return 0;
8698 }
8699 
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8700 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8701 {
8702 	u64 monitor_base;
8703 	u32 size = 0;
8704 	u16 msg_addr_offset;
8705 
8706 	/*
8707 	 * monitor_base should be the content of the base0 address registers,
8708 	 * so it will be added to the msg short offsets
8709 	 */
8710 	monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8711 
8712 	/* First monitor config packet: low address of the sync */
8713 	msg_addr_offset =
8714 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8715 				monitor_base;
8716 
8717 	size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8718 					msg_addr_offset);
8719 
8720 	/* Second monitor config packet: high address of the sync */
8721 	msg_addr_offset =
8722 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8723 				monitor_base;
8724 
8725 	size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8726 					msg_addr_offset);
8727 
8728 	/*
8729 	 * Third monitor config packet: the payload, i.e. what to write when the
8730 	 * sync triggers
8731 	 */
8732 	msg_addr_offset =
8733 		(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8734 				monitor_base;
8735 
8736 	size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8737 
8738 	return size;
8739 }
8740 
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8741 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8742 				struct hl_gen_wait_properties *prop)
8743 {
8744 	struct hl_cb *cb = (struct hl_cb *) prop->data;
8745 	void *buf = cb->kernel_address;
8746 	u64 fence_addr = 0;
8747 	u32 size = prop->size;
8748 
8749 	if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8750 		dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8751 				prop->q_idx);
8752 		return 0;
8753 	}
8754 
8755 	size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8756 	size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8757 			prop->sob_mask, prop->sob_val, prop->mon_id);
8758 	size += gaudi_add_fence_pkt(buf + size);
8759 
8760 	return size;
8761 }
8762 
gaudi_reset_sob(struct hl_device * hdev,void * data)8763 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8764 {
8765 	struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8766 
8767 	dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8768 		hw_sob->sob_id);
8769 
8770 	WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8771 			hw_sob->sob_id * 4, 0);
8772 
8773 	kref_init(&hw_sob->kref);
8774 }
8775 
gaudi_get_device_time(struct hl_device * hdev)8776 static u64 gaudi_get_device_time(struct hl_device *hdev)
8777 {
8778 	u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8779 
8780 	return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8781 }
8782 
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8783 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8784 				u32 *block_size, u32 *block_id)
8785 {
8786 	return -EPERM;
8787 }
8788 
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8789 static int gaudi_block_mmap(struct hl_device *hdev,
8790 				struct vm_area_struct *vma,
8791 				u32 block_id, u32 block_size)
8792 {
8793 	return -EPERM;
8794 }
8795 
gaudi_enable_events_from_fw(struct hl_device * hdev)8796 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8797 {
8798 	struct cpu_dyn_regs *dyn_regs =
8799 			&hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8800 	u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8801 			mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8802 			le32_to_cpu(dyn_regs->gic_host_ints_irq);
8803 
8804 	WREG32(irq_handler_offset,
8805 		gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8806 }
8807 
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8808 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8809 {
8810 	return -EINVAL;
8811 }
8812 
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8813 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8814 {
8815 	switch (pll_idx) {
8816 	case HL_GAUDI_CPU_PLL: return CPU_PLL;
8817 	case HL_GAUDI_PCI_PLL: return PCI_PLL;
8818 	case HL_GAUDI_NIC_PLL: return NIC_PLL;
8819 	case HL_GAUDI_DMA_PLL: return DMA_PLL;
8820 	case HL_GAUDI_MESH_PLL: return MESH_PLL;
8821 	case HL_GAUDI_MME_PLL: return MME_PLL;
8822 	case HL_GAUDI_TPC_PLL: return TPC_PLL;
8823 	case HL_GAUDI_IF_PLL: return IF_PLL;
8824 	case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8825 	case HL_GAUDI_HBM_PLL: return HBM_PLL;
8826 	default: return -EINVAL;
8827 	}
8828 }
8829 
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8830 static int gaudi_add_sync_to_engine_map_entry(
8831 	struct hl_sync_to_engine_map *map, u32 reg_value,
8832 	enum hl_sync_engine_type engine_type, u32 engine_id)
8833 {
8834 	struct hl_sync_to_engine_map_entry *entry;
8835 
8836 	/* Reg value represents a partial address of sync object,
8837 	 * it is used as unique identifier. For this we need to
8838 	 * clear the cutoff cfg base bits from the value.
8839 	 */
8840 	if (reg_value == 0 || reg_value == 0xffffffff)
8841 		return 0;
8842 	reg_value -= lower_32_bits(CFG_BASE);
8843 
8844 	/* create a new hash entry */
8845 	entry = kzalloc_obj(*entry);
8846 	if (!entry)
8847 		return -ENOMEM;
8848 	entry->engine_type = engine_type;
8849 	entry->engine_id = engine_id;
8850 	entry->sync_id = reg_value;
8851 	hash_add(map->tb, &entry->node, reg_value);
8852 
8853 	return 0;
8854 }
8855 
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8856 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8857 				struct hl_sync_to_engine_map *map)
8858 {
8859 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8860 	int i, j, rc;
8861 	u32 reg_value;
8862 
8863 	/* Iterate over TPC engines */
8864 	for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8865 
8866 		reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8867 					sds->props[SP_NEXT_TPC] * i);
8868 
8869 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8870 							ENGINE_TPC, i);
8871 		if (rc)
8872 			goto free_sync_to_engine_map;
8873 	}
8874 
8875 	/* Iterate over MME engines */
8876 	for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8877 		for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8878 
8879 			reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8880 						sds->props[SP_NEXT_MME] * i +
8881 						j * sizeof(u32));
8882 
8883 			rc = gaudi_add_sync_to_engine_map_entry(
8884 				map, reg_value, ENGINE_MME,
8885 				i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8886 			if (rc)
8887 				goto free_sync_to_engine_map;
8888 		}
8889 	}
8890 
8891 	/* Iterate over DMA engines */
8892 	for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8893 		reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8894 					sds->props[SP_DMA_QUEUES_OFFSET] * i);
8895 		rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8896 							ENGINE_DMA, i);
8897 		if (rc)
8898 			goto free_sync_to_engine_map;
8899 	}
8900 
8901 	return 0;
8902 
8903 free_sync_to_engine_map:
8904 	hl_state_dump_free_sync_to_engine_map(map);
8905 
8906 	return rc;
8907 }
8908 
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8909 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8910 {
8911 	return FIELD_GET(
8912 		SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8913 		mon->status);
8914 }
8915 
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8916 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8917 {
8918 	const size_t max_write = 10;
8919 	u32 gid, mask, sob;
8920 	int i, offset;
8921 
8922 	/* Sync object ID is calculated as follows:
8923 	 * (8 * group_id + cleared bits in mask)
8924 	 */
8925 	gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8926 			mon->arm_data);
8927 	mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8928 			mon->arm_data);
8929 
8930 	for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8931 		max_write; mask >>= 1, i++) {
8932 		if (!(mask & 1)) {
8933 			sob = gid * MONITOR_MAX_SOBS + i;
8934 
8935 			if (offset > 0)
8936 				offset += snprintf(sobs + offset, max_write,
8937 							", ");
8938 
8939 			offset += snprintf(sobs + offset, max_write, "%u", sob);
8940 		}
8941 	}
8942 }
8943 
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8944 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8945 				struct hl_device *hdev,
8946 				struct hl_mon_state_dump *mon)
8947 {
8948 	const char *name;
8949 	char scratch_buf1[BIN_REG_STRING_SIZE],
8950 		scratch_buf2[BIN_REG_STRING_SIZE];
8951 	char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8952 
8953 	name = hl_state_dump_get_monitor_name(hdev, mon);
8954 	if (!name)
8955 		name = "";
8956 
8957 	gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8958 
8959 	return hl_snprintf_resize(
8960 		buf, size, offset,
8961 		"Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8962 		mon->id, name,
8963 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8964 				mon->arm_data),
8965 		hl_format_as_binary(
8966 			scratch_buf1, sizeof(scratch_buf1),
8967 			FIELD_GET(
8968 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8969 				mon->arm_data)),
8970 		FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8971 				mon->arm_data),
8972 		mon->wr_data,
8973 		(((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8974 		hl_format_as_binary(
8975 			scratch_buf2, sizeof(scratch_buf2),
8976 			FIELD_GET(
8977 				SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8978 				mon->status)),
8979 		monitored_sobs);
8980 }
8981 
8982 
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)8983 static int gaudi_print_fences_single_engine(
8984 	struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8985 	enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8986 	size_t *size, size_t *offset)
8987 {
8988 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8989 	int rc = -ENOMEM, i;
8990 	u32 *statuses, *fences;
8991 
8992 	statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8993 			sizeof(*statuses), GFP_KERNEL);
8994 	if (!statuses)
8995 		goto out;
8996 
8997 	fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8998 				sds->props[SP_ENGINE_NUM_OF_QUEUES],
8999 			 sizeof(*fences), GFP_KERNEL);
9000 	if (!fences)
9001 		goto free_status;
9002 
9003 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9004 		statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9005 
9006 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9007 				sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9008 		fences[i] = RREG32(base_offset + i * sizeof(u32));
9009 
9010 	/* The actual print */
9011 	for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9012 		u32 fence_id;
9013 		u64 fence_cnt, fence_rdata;
9014 		const char *engine_name;
9015 
9016 		if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9017 			statuses[i]))
9018 			continue;
9019 
9020 		fence_id =
9021 			FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9022 		fence_cnt = base_offset + CFG_BASE +
9023 			sizeof(u32) *
9024 			(i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9025 		fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9026 				sds->props[SP_FENCE0_RDATA_OFFSET];
9027 		engine_name = hl_sync_engine_to_string(engine_type);
9028 
9029 		rc = hl_snprintf_resize(
9030 			buf, size, offset,
9031 			"%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9032 			engine_name, engine_id,
9033 			i, fence_id,
9034 			fence_cnt, engine_name, engine_id, fence_id, i,
9035 			fence_rdata, engine_name, engine_id, fence_id, i,
9036 			fences[fence_id],
9037 			statuses[i]);
9038 		if (rc)
9039 			goto free_fences;
9040 	}
9041 
9042 	rc = 0;
9043 
9044 free_fences:
9045 	kfree(fences);
9046 free_status:
9047 	kfree(statuses);
9048 out:
9049 	return rc;
9050 }
9051 
9052 
9053 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9054 	.monitor_valid = gaudi_monitor_valid,
9055 	.print_single_monitor = gaudi_print_single_monitor,
9056 	.gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9057 	.print_fences_single_engine = gaudi_print_fences_single_engine,
9058 };
9059 
gaudi_state_dump_init(struct hl_device * hdev)9060 static void gaudi_state_dump_init(struct hl_device *hdev)
9061 {
9062 	struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9063 	int i;
9064 
9065 	for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9066 		hash_add(sds->so_id_to_str_tb,
9067 			&gaudi_so_id_to_str[i].node,
9068 			gaudi_so_id_to_str[i].id);
9069 
9070 	for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9071 		hash_add(sds->monitor_id_to_str_tb,
9072 			&gaudi_monitor_id_to_str[i].node,
9073 			gaudi_monitor_id_to_str[i].id);
9074 
9075 	sds->props = gaudi_state_dump_specs_props;
9076 
9077 	sds->sync_namager_names = gaudi_sync_manager_names;
9078 
9079 	sds->funcs = gaudi_state_dump_funcs;
9080 }
9081 
gaudi_get_stream_master_qid_arr(void)9082 static u32 *gaudi_get_stream_master_qid_arr(void)
9083 {
9084 	return gaudi_stream_master;
9085 }
9086 
gaudi_set_dram_properties(struct hl_device * hdev)9087 static int gaudi_set_dram_properties(struct hl_device *hdev)
9088 {
9089 	return 0;
9090 }
9091 
gaudi_set_binning_masks(struct hl_device * hdev)9092 static int gaudi_set_binning_masks(struct hl_device *hdev)
9093 {
9094 	return 0;
9095 }
9096 
gaudi_check_if_razwi_happened(struct hl_device * hdev)9097 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9098 {
9099 }
9100 
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9101 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9102 {
9103 	struct hl_device *hdev = dev_get_drvdata(dev);
9104 	struct cpucp_info *cpucp_info;
9105 
9106 	cpucp_info = &hdev->asic_prop.cpucp_info;
9107 
9108 	return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9109 }
9110 
9111 static DEVICE_ATTR_RO(infineon_ver);
9112 
9113 static struct attribute *gaudi_vrm_dev_attrs[] = {
9114 	&dev_attr_infineon_ver.attr,
9115 	NULL,
9116 };
9117 
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9118 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9119 					struct attribute_group *dev_vrm_attr_grp)
9120 {
9121 	hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9122 	dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9123 }
9124 
gaudi_send_device_activity(struct hl_device * hdev,bool open)9125 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9126 {
9127 	return 0;
9128 }
9129 
9130 static const struct hl_asic_funcs gaudi_funcs = {
9131 	.early_init = gaudi_early_init,
9132 	.early_fini = gaudi_early_fini,
9133 	.late_init = gaudi_late_init,
9134 	.late_fini = gaudi_late_fini,
9135 	.sw_init = gaudi_sw_init,
9136 	.sw_fini = gaudi_sw_fini,
9137 	.hw_init = gaudi_hw_init,
9138 	.hw_fini = gaudi_hw_fini,
9139 	.halt_engines = gaudi_halt_engines,
9140 	.suspend = gaudi_suspend,
9141 	.resume = gaudi_resume,
9142 	.mmap = gaudi_mmap,
9143 	.ring_doorbell = gaudi_ring_doorbell,
9144 	.pqe_write = gaudi_pqe_write,
9145 	.asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9146 	.asic_dma_free_coherent = gaudi_dma_free_coherent,
9147 	.scrub_device_mem = gaudi_scrub_device_mem,
9148 	.scrub_device_dram = gaudi_scrub_device_dram,
9149 	.get_int_queue_base = gaudi_get_int_queue_base,
9150 	.test_queues = gaudi_test_queues,
9151 	.asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9152 	.asic_dma_pool_free = gaudi_dma_pool_free,
9153 	.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9154 	.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9155 	.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9156 	.cs_parser = gaudi_cs_parser,
9157 	.dma_map_sgtable = hl_asic_dma_map_sgtable,
9158 	.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9159 	.update_eq_ci = gaudi_update_eq_ci,
9160 	.context_switch = gaudi_context_switch,
9161 	.restore_phase_topology = gaudi_restore_phase_topology,
9162 	.debugfs_read_dma = gaudi_debugfs_read_dma,
9163 	.add_device_attr = gaudi_add_device_attr,
9164 	.handle_eqe = gaudi_handle_eqe,
9165 	.get_events_stat = gaudi_get_events_stat,
9166 	.read_pte = gaudi_read_pte,
9167 	.write_pte = gaudi_write_pte,
9168 	.mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9169 	.mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9170 	.mmu_prefetch_cache_range = NULL,
9171 	.send_heartbeat = gaudi_send_heartbeat,
9172 	.debug_coresight = gaudi_debug_coresight,
9173 	.is_device_idle = gaudi_is_device_idle,
9174 	.compute_reset_late_init = gaudi_compute_reset_late_init,
9175 	.hw_queues_lock = gaudi_hw_queues_lock,
9176 	.hw_queues_unlock = gaudi_hw_queues_unlock,
9177 	.get_pci_id = gaudi_get_pci_id,
9178 	.get_eeprom_data = gaudi_get_eeprom_data,
9179 	.get_monitor_dump = gaudi_get_monitor_dump,
9180 	.send_cpu_message = gaudi_send_cpu_message,
9181 	.pci_bars_map = gaudi_pci_bars_map,
9182 	.init_iatu = gaudi_init_iatu,
9183 	.rreg = hl_rreg,
9184 	.wreg = hl_wreg,
9185 	.halt_coresight = gaudi_halt_coresight,
9186 	.ctx_init = gaudi_ctx_init,
9187 	.ctx_fini = gaudi_ctx_fini,
9188 	.pre_schedule_cs = gaudi_pre_schedule_cs,
9189 	.get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9190 	.load_firmware_to_device = gaudi_load_firmware_to_device,
9191 	.load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9192 	.get_signal_cb_size = gaudi_get_signal_cb_size,
9193 	.get_wait_cb_size = gaudi_get_wait_cb_size,
9194 	.gen_signal_cb = gaudi_gen_signal_cb,
9195 	.gen_wait_cb = gaudi_gen_wait_cb,
9196 	.reset_sob = gaudi_reset_sob,
9197 	.reset_sob_group = gaudi_reset_sob_group,
9198 	.get_device_time = gaudi_get_device_time,
9199 	.pb_print_security_errors = NULL,
9200 	.collective_wait_init_cs = gaudi_collective_wait_init_cs,
9201 	.collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9202 	.get_dec_base_addr = NULL,
9203 	.scramble_addr = hl_mmu_scramble_addr,
9204 	.descramble_addr = hl_mmu_descramble_addr,
9205 	.ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9206 	.get_hw_block_id = gaudi_get_hw_block_id,
9207 	.hw_block_mmap = gaudi_block_mmap,
9208 	.enable_events_from_fw = gaudi_enable_events_from_fw,
9209 	.ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9210 	.map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9211 	.init_firmware_preload_params = gaudi_init_firmware_preload_params,
9212 	.init_firmware_loader = gaudi_init_firmware_loader,
9213 	.init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9214 	.state_dump_init = gaudi_state_dump_init,
9215 	.get_sob_addr = gaudi_get_sob_addr,
9216 	.set_pci_memory_regions = gaudi_set_pci_memory_regions,
9217 	.get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9218 	.check_if_razwi_happened = gaudi_check_if_razwi_happened,
9219 	.mmu_get_real_page_size = hl_mmu_get_real_page_size,
9220 	.access_dev_mem = hl_access_dev_mem,
9221 	.set_dram_bar_base = gaudi_set_hbm_bar_base,
9222 	.send_device_activity = gaudi_send_device_activity,
9223 	.set_dram_properties = gaudi_set_dram_properties,
9224 	.set_binning_masks = gaudi_set_binning_masks,
9225 };
9226 
9227 /**
9228  * gaudi_set_asic_funcs - set GAUDI function pointers
9229  *
9230  * @hdev: pointer to hl_device structure
9231  *
9232  */
gaudi_set_asic_funcs(struct hl_device * hdev)9233 void gaudi_set_asic_funcs(struct hl_device *hdev)
9234 {
9235 	hdev->asic_funcs = &gaudi_funcs;
9236 }
9237