1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89 #define GAUDI_MAX_STRING_LEN 20
90
91 #define GAUDI_CB_POOL_CB_CNT 512
92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108 #define MONITOR_SOB_STRING_SIZE 256
109
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 GAUDI_QUEUE_ID_DMA_0_0,
112 GAUDI_QUEUE_ID_DMA_0_1,
113 GAUDI_QUEUE_ID_DMA_0_2,
114 GAUDI_QUEUE_ID_DMA_0_3,
115 GAUDI_QUEUE_ID_DMA_1_0,
116 GAUDI_QUEUE_ID_DMA_1_1,
117 GAUDI_QUEUE_ID_DMA_1_2,
118 GAUDI_QUEUE_ID_DMA_1_3
119 };
120
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 [0] = GAUDI_QUEUE_ID_DMA_0_0,
134 [1] = GAUDI_QUEUE_ID_DMA_0_1,
135 [2] = GAUDI_QUEUE_ID_DMA_0_2,
136 [3] = GAUDI_QUEUE_ID_DMA_0_3,
137 [4] = GAUDI_QUEUE_ID_DMA_1_0,
138 [5] = GAUDI_QUEUE_ID_DMA_1_1,
139 [6] = GAUDI_QUEUE_ID_DMA_1_2,
140 [7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149 [PACKET_REPEAT] = sizeof(struct packet_repeat),
150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151 [PACKET_FENCE] = sizeof(struct packet_fence),
152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153 [PACKET_NOP] = sizeof(struct packet_nop),
154 [PACKET_STOP] = sizeof(struct packet_stop),
155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156 [PACKET_WAIT] = sizeof(struct packet_wait),
157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158 };
159
validate_packet_id(enum packet_id id)160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 switch (id) {
163 case PACKET_WREG_32:
164 case PACKET_WREG_BULK:
165 case PACKET_MSG_LONG:
166 case PACKET_MSG_SHORT:
167 case PACKET_CP_DMA:
168 case PACKET_REPEAT:
169 case PACKET_MSG_PROT:
170 case PACKET_FENCE:
171 case PACKET_LIN_DMA:
172 case PACKET_NOP:
173 case PACKET_STOP:
174 case PACKET_ARB_POINT:
175 case PACKET_WAIT:
176 case PACKET_LOAD_AND_EXE:
177 return true;
178 default:
179 return false;
180 }
181 }
182
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 "tpc_address_exceed_slm",
186 "tpc_div_by_0",
187 "tpc_spu_mac_overflow",
188 "tpc_spu_addsub_overflow",
189 "tpc_spu_abs_overflow",
190 "tpc_spu_fp_dst_nan_inf",
191 "tpc_spu_fp_dst_denorm",
192 "tpc_vpu_mac_overflow",
193 "tpc_vpu_addsub_overflow",
194 "tpc_vpu_abs_overflow",
195 "tpc_vpu_fp_dst_nan_inf",
196 "tpc_vpu_fp_dst_denorm",
197 "tpc_assertions",
198 "tpc_illegal_instruction",
199 "tpc_pc_wrap_around",
200 "tpc_qm_sw_err",
201 "tpc_hbw_rresp_err",
202 "tpc_hbw_bresp_err",
203 "tpc_lbw_rresp_err",
204 "tpc_lbw_bresp_err"
205 };
206
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "PQ AXI HBW error",
210 "CQ AXI HBW error",
211 "CP AXI HBW error",
212 "CP error due to undefined OPCODE",
213 "CP encountered STOP OPCODE",
214 "CP AXI LBW error",
215 "CP WRREG32 or WRBULK returned error",
216 "N/A",
217 "FENCE 0 inc over max value and clipped",
218 "FENCE 1 inc over max value and clipped",
219 "FENCE 2 inc over max value and clipped",
220 "FENCE 3 inc over max value and clipped",
221 "FENCE 0 dec under min value and clipped",
222 "FENCE 1 dec under min value and clipped",
223 "FENCE 2 dec under min value and clipped",
224 "FENCE 3 dec under min value and clipped"
225 };
226
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 "Choice push while full error",
230 "Choice Q watchdog error",
231 "MSG AXI LBW returned with error"
232 };
233
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393
394 static s64 gaudi_state_dump_specs_props[] = {
395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 [SP_MON_OBJ_WR_ADDR_LOW] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 [SP_MON_OBJ_WR_ADDR_HIGH] =
401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 [SP_FENCE0_CNT_OFFSET] =
423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 [SP_FENCE0_RDATA_OFFSET] =
425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_NUM_CORES] = 1,
428 };
429
430 static const int gaudi_queue_id_to_engine_id[] = {
431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461
462 /* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464 */
465 static const char * const gaudi_sync_manager_names[] = {
466 "SYNC_MGR_E_N",
467 "SYNC_MGR_W_N",
468 "SYNC_MGR_E_S",
469 "SYNC_MGR_W_S",
470 NULL
471 };
472
473 struct ecc_info_extract_params {
474 u64 block_address;
475 u32 num_memories;
476 bool derr;
477 };
478
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 return HL_COLLECTIVE_MASTER;
502
503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 return HL_COLLECTIVE_SLAVE;
506
507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 return HL_COLLECTIVE_SLAVE;
510
511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 return HL_COLLECTIVE_SLAVE;
514
515 return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517
set_default_power_values(struct hl_device * hdev)518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522 if (hdev->card_type == cpucp_card_type_pmc) {
523 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525 if (prop->fw_security_enabled)
526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 else
528 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 } else {
530 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 }
533 }
534
gaudi_set_fixed_properties(struct hl_device * hdev)535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 struct asic_fixed_properties *prop = &hdev->asic_prop;
538 u32 num_sync_stream_queues = 0;
539 int i;
540
541 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 prop->hw_queues_props = kcalloc(prop->max_queues,
543 sizeof(struct hw_queue_properties),
544 GFP_KERNEL);
545
546 if (!prop->hw_queues_props)
547 return -ENOMEM;
548
549 for (i = 0 ; i < prop->max_queues ; i++) {
550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
552 prop->hw_queues_props[i].driver_only = 0;
553 prop->hw_queues_props[i].supports_sync_stream = 1;
554 prop->hw_queues_props[i].cb_alloc_flags =
555 CB_ALLOC_KERNEL;
556 num_sync_stream_queues++;
557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
559 prop->hw_queues_props[i].driver_only = 1;
560 prop->hw_queues_props[i].supports_sync_stream = 0;
561 prop->hw_queues_props[i].cb_alloc_flags =
562 CB_ALLOC_KERNEL;
563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
565 prop->hw_queues_props[i].driver_only = 0;
566 prop->hw_queues_props[i].supports_sync_stream = 0;
567 prop->hw_queues_props[i].cb_alloc_flags =
568 CB_ALLOC_USER;
569
570 }
571 prop->hw_queues_props[i].collective_mode =
572 get_collective_mode(hdev, i);
573 }
574
575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
576 prop->cfg_base_address = CFG_BASE;
577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
578 prop->host_base_address = HOST_PHYS_BASE;
579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
581 prop->completion_mode = HL_COMPLETION_MODE_JOB;
582 prop->collective_first_sob = 0;
583 prop->collective_first_mon = 0;
584
585 /* 2 SOBs per internal queue stream are reserved for collective */
586 prop->sync_stream_first_sob =
587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
588 * QMAN_STREAMS * HL_RSVD_SOBS;
589
590 /* 1 monitor per internal queue stream are reserved for collective
591 * 2 monitors per external queue stream are reserved for collective
592 */
593 prop->sync_stream_first_mon =
594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
595 (NUMBER_OF_EXT_HW_QUEUES * 2);
596
597 prop->dram_base_address = DRAM_PHYS_BASE;
598 prop->dram_size = GAUDI_HBM_SIZE_32GB;
599 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
601
602 prop->sram_base_address = SRAM_BASE_ADDR;
603 prop->sram_size = SRAM_SIZE;
604 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
605 prop->sram_user_base_address =
606 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
607
608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
610
611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
612 if (hdev->pldm)
613 prop->mmu_pgt_size = 0x800000; /* 8MB */
614 else
615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
616 prop->mmu_pte_size = HL_PTE_SIZE;
617 prop->dram_page_size = PAGE_SIZE_2MB;
618 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
619 prop->dram_supports_virtual_memory = false;
620
621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
631 prop->pmmu.start_addr = VA_HOST_SPACE_START;
632 prop->pmmu.end_addr =
633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
634 prop->pmmu.page_size = PAGE_SIZE_4KB;
635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
636 prop->pmmu.last_mask = LAST_MASK;
637 /* TODO: will be duplicated until implementing per-MMU props */
638 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
639 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
640
641 /* PMMU and HPMMU are the same except of page size */
642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
644
645 /* shifts and masks are the same in PMMU and DMMU */
646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
648 prop->dmmu.end_addr = VA_HOST_SPACE_END;
649 prop->dmmu.page_size = PAGE_SIZE_2MB;
650 prop->dmmu.pgt_size = prop->mmu_pgt_size;
651
652 prop->cfg_size = CFG_SIZE;
653 prop->max_asid = MAX_ASID;
654 prop->num_of_events = GAUDI_EVENT_SIZE;
655 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
656 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
657
658 set_default_power_values(hdev);
659
660 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
661 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
662
663 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
664 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
665
666 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
667 CARD_NAME_MAX_LEN);
668
669 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
670
671 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
672 prop->sync_stream_first_sob +
673 (num_sync_stream_queues * HL_RSVD_SOBS);
674 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
675 prop->sync_stream_first_mon +
676 (num_sync_stream_queues * HL_RSVD_MONS);
677
678 prop->first_available_user_interrupt = USHRT_MAX;
679 prop->tpc_interrupt_id = USHRT_MAX;
680
681 /* single msi */
682 prop->eq_interrupt_id = 0;
683
684 for (i = 0 ; i < HL_MAX_DCORES ; i++)
685 prop->first_available_cq[i] = USHRT_MAX;
686
687 prop->fw_cpu_boot_dev_sts0_valid = false;
688 prop->fw_cpu_boot_dev_sts1_valid = false;
689 prop->hard_reset_done_by_fw = false;
690 prop->gic_interrupts_enable = true;
691
692 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
693
694 prop->clk_pll_index = HL_GAUDI_MME_PLL;
695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
696
697 prop->use_get_power_for_reset_history = true;
698
699 prop->configurable_stop_on_err = true;
700
701 prop->set_max_power_on_device_init = true;
702
703 prop->dma_mask = 48;
704
705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
706
707 return 0;
708 }
709
gaudi_pci_bars_map(struct hl_device * hdev)710 static int gaudi_pci_bars_map(struct hl_device *hdev)
711 {
712 static const char * const name[] = {"SRAM", "CFG", "HBM"};
713 bool is_wc[3] = {false, false, true};
714 int rc;
715
716 rc = hl_pci_bars_map(hdev, name, is_wc);
717 if (rc)
718 return rc;
719
720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
721 (CFG_BASE - SPI_FLASH_BASE_ADDR);
722
723 return 0;
724 }
725
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
727 {
728 struct gaudi_device *gaudi = hdev->asic_specific;
729 struct hl_inbound_pci_region pci_region;
730 u64 old_addr = addr;
731 int rc;
732
733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
734 return old_addr;
735
736 if (hdev->asic_prop.iatu_done_by_fw)
737 return U64_MAX;
738
739 /* Inbound Region 2 - Bar 4 - Point to HBM */
740 pci_region.mode = PCI_BAR_MATCH_MODE;
741 pci_region.bar = HBM_BAR_ID;
742 pci_region.addr = addr;
743 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
744 if (rc)
745 return U64_MAX;
746
747 if (gaudi) {
748 old_addr = gaudi->hbm_bar_cur_addr;
749 gaudi->hbm_bar_cur_addr = addr;
750 }
751
752 return old_addr;
753 }
754
gaudi_init_iatu(struct hl_device * hdev)755 static int gaudi_init_iatu(struct hl_device *hdev)
756 {
757 struct hl_inbound_pci_region inbound_region;
758 struct hl_outbound_pci_region outbound_region;
759 int rc;
760
761 if (hdev->asic_prop.iatu_done_by_fw)
762 return 0;
763
764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
765 inbound_region.mode = PCI_BAR_MATCH_MODE;
766 inbound_region.bar = SRAM_BAR_ID;
767 inbound_region.addr = SRAM_BASE_ADDR;
768 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
769 if (rc)
770 goto done;
771
772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
773 inbound_region.mode = PCI_BAR_MATCH_MODE;
774 inbound_region.bar = CFG_BAR_ID;
775 inbound_region.addr = SPI_FLASH_BASE_ADDR;
776 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
777 if (rc)
778 goto done;
779
780 /* Inbound Region 2 - Bar 4 - Point to HBM */
781 inbound_region.mode = PCI_BAR_MATCH_MODE;
782 inbound_region.bar = HBM_BAR_ID;
783 inbound_region.addr = DRAM_PHYS_BASE;
784 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
785 if (rc)
786 goto done;
787
788 /* Outbound Region 0 - Point to Host */
789 outbound_region.addr = HOST_PHYS_BASE;
790 outbound_region.size = HOST_PHYS_SIZE;
791 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
792
793 done:
794 return rc;
795 }
796
gaudi_get_hw_state(struct hl_device * hdev)797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
798 {
799 return RREG32(mmHW_STATE);
800 }
801
gaudi_early_init(struct hl_device * hdev)802 static int gaudi_early_init(struct hl_device *hdev)
803 {
804 struct asic_fixed_properties *prop = &hdev->asic_prop;
805 struct pci_dev *pdev = hdev->pdev;
806 resource_size_t pci_bar_size;
807 u32 fw_boot_status;
808 int rc;
809
810 rc = gaudi_set_fixed_properties(hdev);
811 if (rc) {
812 dev_err(hdev->dev, "Failed setting fixed properties\n");
813 return rc;
814 }
815
816 /* Check BAR sizes */
817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
818
819 if (pci_bar_size != SRAM_BAR_SIZE) {
820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
822 rc = -ENODEV;
823 goto free_queue_props;
824 }
825
826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
827
828 if (pci_bar_size != CFG_BAR_SIZE) {
829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
831 rc = -ENODEV;
832 goto free_queue_props;
833 }
834
835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
837
838 /* If FW security is enabled at this point it means no access to ELBI */
839 if (hdev->asic_prop.fw_security_enabled) {
840 hdev->asic_prop.iatu_done_by_fw = true;
841
842 /*
843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
844 * decision can only be taken based on PCI ID security.
845 */
846 hdev->asic_prop.gic_interrupts_enable = false;
847 goto pci_init;
848 }
849
850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
851 &fw_boot_status);
852 if (rc)
853 goto free_queue_props;
854
855 /* Check whether FW is configuring iATU */
856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
858 hdev->asic_prop.iatu_done_by_fw = true;
859
860 pci_init:
861 rc = hl_pci_init(hdev);
862 if (rc)
863 goto free_queue_props;
864
865 /* Before continuing in the initialization, we need to read the preboot
866 * version to determine whether we run with a security-enabled firmware
867 */
868 rc = hl_fw_read_preboot_status(hdev);
869 if (rc) {
870 if (hdev->reset_on_preboot_fail)
871 /* we are already on failure flow, so don't check if hw_fini fails. */
872 hdev->asic_funcs->hw_fini(hdev, true, false);
873 goto pci_fini;
874 }
875
876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
878 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
879 if (rc) {
880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
881 goto pci_fini;
882 }
883 }
884
885 return 0;
886
887 pci_fini:
888 hl_pci_fini(hdev);
889 free_queue_props:
890 kfree(hdev->asic_prop.hw_queues_props);
891 return rc;
892 }
893
gaudi_early_fini(struct hl_device * hdev)894 static int gaudi_early_fini(struct hl_device *hdev)
895 {
896 kfree(hdev->asic_prop.hw_queues_props);
897 hl_pci_fini(hdev);
898
899 return 0;
900 }
901
902 /**
903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
904 *
905 * @hdev: pointer to hl_device structure
906 *
907 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
909 {
910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
911 struct asic_fixed_properties *prop = &hdev->asic_prop;
912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
913 int rc;
914
915 if ((hdev->fw_components & FW_TYPE_LINUX) &&
916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
917 struct gaudi_device *gaudi = hdev->asic_specific;
918
919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
920 return 0;
921
922 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
923
924 if (rc)
925 return rc;
926
927 freq = pll_freq_arr[2];
928 } else {
929 /* Backward compatibility */
930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
932 nr = RREG32(mmPSOC_CPU_PLL_NR);
933 nf = RREG32(mmPSOC_CPU_PLL_NF);
934 od = RREG32(mmPSOC_CPU_PLL_OD);
935
936 if (div_sel == DIV_SEL_REF_CLK ||
937 div_sel == DIV_SEL_DIVIDED_REF) {
938 if (div_sel == DIV_SEL_REF_CLK)
939 freq = PLL_REF_CLK;
940 else
941 freq = PLL_REF_CLK / (div_fctr + 1);
942 } else if (div_sel == DIV_SEL_PLL_CLK ||
943 div_sel == DIV_SEL_DIVIDED_PLL) {
944 pll_clk = PLL_REF_CLK * (nf + 1) /
945 ((nr + 1) * (od + 1));
946 if (div_sel == DIV_SEL_PLL_CLK)
947 freq = pll_clk;
948 else
949 freq = pll_clk / (div_fctr + 1);
950 } else {
951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
952 freq = 0;
953 }
954 }
955
956 prop->psoc_timestamp_frequency = freq;
957 prop->psoc_pci_pll_nr = nr;
958 prop->psoc_pci_pll_nf = nf;
959 prop->psoc_pci_pll_od = od;
960 prop->psoc_pci_pll_div_factor = div_fctr;
961
962 return 0;
963 }
964
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)965 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
967 {
968 struct asic_fixed_properties *prop = &hdev->asic_prop;
969 struct packet_lin_dma *init_tpc_mem_pkt;
970 struct hl_cs_job *job;
971 struct hl_cb *cb;
972 u64 dst_addr;
973 u32 cb_size, ctl;
974 u8 tpc_id;
975 int rc;
976
977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
978 if (!cb)
979 return -EFAULT;
980
981 init_tpc_mem_pkt = cb->kernel_address;
982 cb_size = sizeof(*init_tpc_mem_pkt);
983 memset(init_tpc_mem_pkt, 0, cb_size);
984
985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
986
987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
991
992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
993
994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
995
996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
998 round_up(prop->sram_user_base_address, SZ_8K));
999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
1000
1001 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1002 if (!job) {
1003 dev_err(hdev->dev, "Failed to allocate a new job\n");
1004 rc = -ENOMEM;
1005 goto release_cb;
1006 }
1007
1008 job->id = 0;
1009 job->user_cb = cb;
1010 atomic_inc(&job->user_cb->cs_cnt);
1011 job->user_cb_size = cb_size;
1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1013 job->patched_cb = job->user_cb;
1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1015
1016 hl_debugfs_add_job(hdev, job);
1017
1018 rc = gaudi_send_job_on_qman0(hdev, job);
1019
1020 if (rc)
1021 goto free_job;
1022
1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1024 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1025 if (rc)
1026 break;
1027 }
1028
1029 free_job:
1030 hl_userptr_delete_list(hdev, &job->userptr_list);
1031 hl_debugfs_remove_job(hdev, job);
1032 kfree(job);
1033 atomic_dec(&cb->cs_cnt);
1034
1035 release_cb:
1036 hl_cb_put(cb);
1037 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1038
1039 return rc;
1040 }
1041
1042 /*
1043 * gaudi_init_tpc_mem() - Initialize TPC memories.
1044 * @hdev: Pointer to hl_device structure.
1045 *
1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1047 *
1048 * Return: 0 for success, negative value for error.
1049 */
gaudi_init_tpc_mem(struct hl_device * hdev)1050 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1051 {
1052 const struct firmware *fw;
1053 size_t fw_size;
1054 void *cpu_addr;
1055 dma_addr_t dma_handle;
1056 int rc, count = 5;
1057
1058 again:
1059 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1060 if (rc == -EINTR && count-- > 0) {
1061 msleep(50);
1062 goto again;
1063 }
1064
1065 if (rc) {
1066 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1067 GAUDI_TPC_FW_FILE);
1068 goto out;
1069 }
1070
1071 fw_size = fw->size;
1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1073 if (!cpu_addr) {
1074 dev_err(hdev->dev,
1075 "Failed to allocate %zu of dma memory for TPC kernel\n",
1076 fw_size);
1077 rc = -ENOMEM;
1078 goto out;
1079 }
1080
1081 memcpy(cpu_addr, fw->data, fw_size);
1082
1083 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1084
1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1086
1087 out:
1088 release_firmware(fw);
1089 return rc;
1090 }
1091
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1093 {
1094 struct gaudi_device *gaudi = hdev->asic_specific;
1095 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1096 struct hl_hw_queue *q;
1097 u32 i, sob_id, sob_group_id, queue_id;
1098
1099 /* Iterate through SOB groups and assign a SOB for each slave queue */
1100 sob_group_id =
1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1103
1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1106 q = &hdev->kernel_queues[queue_id + (4 * i)];
1107 q->sync_stream_prop.collective_sob_id = sob_id + i;
1108 }
1109
1110 /* Both DMA5 and TPC7 use the same resources since only a single
1111 * engine need to participate in the reduction process
1112 */
1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1114 q = &hdev->kernel_queues[queue_id];
1115 q->sync_stream_prop.collective_sob_id =
1116 sob_id + NIC_NUMBER_OF_ENGINES;
1117
1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1119 q = &hdev->kernel_queues[queue_id];
1120 q->sync_stream_prop.collective_sob_id =
1121 sob_id + NIC_NUMBER_OF_ENGINES;
1122 }
1123
gaudi_sob_group_hw_reset(struct kref * ref)1124 static void gaudi_sob_group_hw_reset(struct kref *ref)
1125 {
1126 struct gaudi_hw_sob_group *hw_sob_group =
1127 container_of(ref, struct gaudi_hw_sob_group, kref);
1128 struct hl_device *hdev = hw_sob_group->hdev;
1129 int i;
1130
1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1134
1135 kref_init(&hw_sob_group->kref);
1136 }
1137
gaudi_sob_group_reset_error(struct kref * ref)1138 static void gaudi_sob_group_reset_error(struct kref *ref)
1139 {
1140 struct gaudi_hw_sob_group *hw_sob_group =
1141 container_of(ref, struct gaudi_hw_sob_group, kref);
1142 struct hl_device *hdev = hw_sob_group->hdev;
1143
1144 dev_crit(hdev->dev,
1145 "SOB release shouldn't be called here, base_sob_id: %d\n",
1146 hw_sob_group->base_sob_id);
1147 }
1148
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1150 {
1151 struct gaudi_collective_properties *prop;
1152 int i;
1153
1154 prop = &gaudi->collective_props;
1155
1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1157
1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1161 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1162 /* Set collective engine bit */
1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1164 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1165 }
1166
gaudi_collective_init(struct hl_device * hdev)1167 static int gaudi_collective_init(struct hl_device *hdev)
1168 {
1169 u32 i, sob_id, reserved_sobs_per_group;
1170 struct gaudi_collective_properties *prop;
1171 struct gaudi_device *gaudi;
1172
1173 gaudi = hdev->asic_specific;
1174 prop = &gaudi->collective_props;
1175 sob_id = hdev->asic_prop.collective_first_sob;
1176
1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1178 reserved_sobs_per_group =
1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1180
1181 /* Init SOB groups */
1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1183 prop->hw_sob_group[i].hdev = hdev;
1184 prop->hw_sob_group[i].base_sob_id = sob_id;
1185 sob_id += reserved_sobs_per_group;
1186 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1187 }
1188
1189 for (i = 0 ; i < QMAN_STREAMS; i++) {
1190 prop->next_sob_group_val[i] = 1;
1191 prop->curr_sob_group_idx[i] = 0;
1192 gaudi_collective_map_sobs(hdev, i);
1193 }
1194
1195 gaudi_collective_mstr_sob_mask_set(gaudi);
1196
1197 return 0;
1198 }
1199
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1201 {
1202 struct gaudi_device *gaudi = hdev->asic_specific;
1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1204
1205 kref_put(&cprop->hw_sob_group[sob_group].kref,
1206 gaudi_sob_group_hw_reset);
1207 }
1208
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1209 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1211 {
1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1213 struct gaudi_collective_properties *cprop;
1214 struct hl_gen_wait_properties wait_prop;
1215 struct hl_sync_stream_properties *prop;
1216 struct gaudi_device *gaudi;
1217
1218 gaudi = hdev->asic_specific;
1219 cprop = &gaudi->collective_props;
1220 queue_id = job->hw_queue_id;
1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1222
1223 master_sob_base =
1224 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1225 master_monitor = prop->collective_mstr_mon_id[0];
1226
1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1228
1229 dev_dbg(hdev->dev,
1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1231 master_sob_base, cprop->mstr_sob_mask[0],
1232 cprop->next_sob_group_val[stream],
1233 master_monitor, queue_id);
1234
1235 wait_prop.data = (void *) job->patched_cb;
1236 wait_prop.sob_base = master_sob_base;
1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1238 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1239 wait_prop.mon_id = master_monitor;
1240 wait_prop.q_idx = queue_id;
1241 wait_prop.size = cb_size;
1242 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1243
1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1245 master_monitor = prop->collective_mstr_mon_id[1];
1246
1247 dev_dbg(hdev->dev,
1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1249 master_sob_base, cprop->mstr_sob_mask[1],
1250 cprop->next_sob_group_val[stream],
1251 master_monitor, queue_id);
1252
1253 wait_prop.sob_base = master_sob_base;
1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1255 wait_prop.mon_id = master_monitor;
1256 wait_prop.size = cb_size;
1257 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1258 }
1259
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1262 {
1263 struct hl_gen_wait_properties wait_prop;
1264 struct hl_sync_stream_properties *prop;
1265 u32 queue_id, cb_size = 0;
1266
1267 queue_id = job->hw_queue_id;
1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1269
1270 if (job->cs->encaps_signals) {
1271 /* use the encaps signal handle store earlier in the flow
1272 * and set the SOB information from the encaps
1273 * signals handle
1274 */
1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1276 cs_cmpl);
1277
1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1279 job->cs->sequence,
1280 cs_cmpl->hw_sob->sob_id,
1281 cs_cmpl->sob_val);
1282 }
1283
1284 /* Add to wait CBs using slave monitor */
1285 wait_prop.data = (void *) job->user_cb;
1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1287 wait_prop.sob_mask = 0x1;
1288 wait_prop.sob_val = cs_cmpl->sob_val;
1289 wait_prop.mon_id = prop->collective_slave_mon_id;
1290 wait_prop.q_idx = queue_id;
1291 wait_prop.size = cb_size;
1292
1293 dev_dbg(hdev->dev,
1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1296 prop->collective_slave_mon_id, queue_id);
1297
1298 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1299
1300 dev_dbg(hdev->dev,
1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1302 prop->collective_sob_id, queue_id);
1303
1304 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1305 prop->collective_sob_id, cb_size, false);
1306 }
1307
gaudi_collective_wait_init_cs(struct hl_cs * cs)1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1309 {
1310 struct hl_cs_compl *signal_cs_cmpl =
1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1312 struct hl_cs_compl *cs_cmpl =
1313 container_of(cs->fence, struct hl_cs_compl, base_fence);
1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1315 struct gaudi_collective_properties *cprop;
1316 u32 stream, queue_id, sob_group_offset;
1317 struct gaudi_device *gaudi;
1318 struct hl_device *hdev;
1319 struct hl_cs_job *job;
1320 struct hl_ctx *ctx;
1321
1322 ctx = cs->ctx;
1323 hdev = ctx->hdev;
1324 gaudi = hdev->asic_specific;
1325 cprop = &gaudi->collective_props;
1326
1327 if (cs->encaps_signals) {
1328 cs_cmpl->hw_sob = handle->hw_sob;
1329 /* at this checkpoint we only need the hw_sob pointer
1330 * for the completion check before start going over the jobs
1331 * of the master/slaves, the sob_value will be taken later on
1332 * in gaudi_collective_slave_init_job depends on each
1333 * job wait offset value.
1334 */
1335 cs_cmpl->sob_val = 0;
1336 } else {
1337 /* copy the SOB id and value of the signal CS */
1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1340 }
1341
1342 /* check again if the signal cs already completed.
1343 * if yes then don't send any wait cs since the hw_sob
1344 * could be in reset already. if signal is not completed
1345 * then get refcount to hw_sob to prevent resetting the sob
1346 * while wait cs is not submitted.
1347 * note that this check is protected by two locks,
1348 * hw queue lock and completion object lock,
1349 * and the same completion object lock also protects
1350 * the hw_sob reset handler function.
1351 * The hw_queue lock prevent out of sync of hw_sob
1352 * refcount value, changed by signal/wait flows.
1353 */
1354 spin_lock(&signal_cs_cmpl->lock);
1355
1356 if (completion_done(&cs->signal_fence->completion)) {
1357 spin_unlock(&signal_cs_cmpl->lock);
1358 return -EINVAL;
1359 }
1360 /* Increment kref since all slave queues are now waiting on it */
1361 kref_get(&cs_cmpl->hw_sob->kref);
1362
1363 spin_unlock(&signal_cs_cmpl->lock);
1364
1365 /* Calculate the stream from collective master queue (1st job) */
1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1367 stream = job->hw_queue_id % 4;
1368 sob_group_offset =
1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1370
1371 list_for_each_entry(job, &cs->job_list, cs_node) {
1372 queue_id = job->hw_queue_id;
1373
1374 if (hdev->kernel_queues[queue_id].collective_mode ==
1375 HL_COLLECTIVE_MASTER)
1376 gaudi_collective_master_init_job(hdev, job, stream,
1377 sob_group_offset);
1378 else
1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1380 }
1381
1382 cs_cmpl->sob_group = sob_group_offset;
1383
1384 /* Handle sob group kref and wraparound */
1385 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1386 cprop->next_sob_group_val[stream]++;
1387
1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1389 /*
1390 * Decrement as we reached the max value.
1391 * The release function won't be called here as we've
1392 * just incremented the refcount.
1393 */
1394 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1395 gaudi_sob_group_reset_error);
1396 cprop->next_sob_group_val[stream] = 1;
1397 /* only two SOBs are currently in use */
1398 cprop->curr_sob_group_idx[stream] =
1399 (cprop->curr_sob_group_idx[stream] + 1) &
1400 (HL_RSVD_SOBS - 1);
1401
1402 gaudi_collective_map_sobs(hdev, stream);
1403
1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1405 cprop->curr_sob_group_idx[stream], stream);
1406 }
1407
1408 mb();
1409 hl_fence_put(cs->signal_fence);
1410 cs->signal_fence = NULL;
1411
1412 return 0;
1413 }
1414
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1416 {
1417 u32 cacheline_end, additional_commands;
1418
1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1420 additional_commands = sizeof(struct packet_msg_prot) * 2;
1421
1422 if (user_cb_size + additional_commands > cacheline_end)
1423 return cacheline_end - user_cb_size + additional_commands;
1424 else
1425 return additional_commands;
1426 }
1427
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1429 struct hl_ctx *ctx, struct hl_cs *cs,
1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1431 u32 encaps_signal_offset)
1432 {
1433 struct hw_queue_properties *hw_queue_prop;
1434 struct hl_cs_counters_atomic *cntr;
1435 struct hl_cs_job *job;
1436 struct hl_cb *cb;
1437 u32 cb_size;
1438 bool patched_cb;
1439
1440 cntr = &hdev->aggregated_cs_counters;
1441
1442 if (mode == HL_COLLECTIVE_MASTER) {
1443 /* CB size of collective master queue contains
1444 * 4 msg short packets for monitor 1 configuration
1445 * 1 fence packet
1446 * 4 msg short packets for monitor 2 configuration
1447 * 1 fence packet
1448 * 2 msg prot packets for completion and MSI
1449 */
1450 cb_size = sizeof(struct packet_msg_short) * 8 +
1451 sizeof(struct packet_fence) * 2 +
1452 sizeof(struct packet_msg_prot) * 2;
1453 patched_cb = true;
1454 } else {
1455 /* CB size of collective slave queues contains
1456 * 4 msg short packets for monitor configuration
1457 * 1 fence packet
1458 * 1 additional msg short packet for sob signal
1459 */
1460 cb_size = sizeof(struct packet_msg_short) * 5 +
1461 sizeof(struct packet_fence);
1462 patched_cb = false;
1463 }
1464
1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1466 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1467 if (!job) {
1468 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1469 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1470 dev_err(hdev->dev, "Failed to allocate a new job\n");
1471 return -ENOMEM;
1472 }
1473
1474 /* Allocate internal mapped CB for non patched CBs */
1475 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1476 if (!cb) {
1477 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1478 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1479 kfree(job);
1480 return -EFAULT;
1481 }
1482
1483 job->id = 0;
1484 job->cs = cs;
1485 job->user_cb = cb;
1486 atomic_inc(&job->user_cb->cs_cnt);
1487 job->user_cb_size = cb_size;
1488 job->hw_queue_id = queue_id;
1489
1490 /* since its guaranteed to have only one chunk in the collective wait
1491 * cs, we can use this chunk to set the encapsulated signal offset
1492 * in the jobs.
1493 */
1494 if (cs->encaps_signals)
1495 job->encaps_sig_wait_offset = encaps_signal_offset;
1496
1497 /*
1498 * No need in parsing, user CB is the patched CB.
1499 * We call hl_cb_destroy() out of two reasons - we don't need
1500 * the CB in the CB idr anymore and to decrement its refcount as
1501 * it was incremented inside hl_cb_kernel_create().
1502 */
1503 if (patched_cb)
1504 job->patched_cb = job->user_cb;
1505 else
1506 job->patched_cb = NULL;
1507
1508 job->job_cb_size = job->user_cb_size;
1509 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1510
1511 /* increment refcount as for external queues we get completion */
1512 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1513 cs_get(cs);
1514
1515 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1516
1517 list_add_tail(&job->cs_node, &cs->job_list);
1518
1519 hl_debugfs_add_job(hdev, job);
1520
1521 return 0;
1522 }
1523
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1525 struct hl_ctx *ctx, struct hl_cs *cs,
1526 u32 wait_queue_id, u32 collective_engine_id,
1527 u32 encaps_signal_offset)
1528 {
1529 struct gaudi_device *gaudi = hdev->asic_specific;
1530 struct hw_queue_properties *hw_queue_prop;
1531 u32 queue_id, collective_queue, num_jobs;
1532 u32 stream, nic_queue, nic_idx = 0;
1533 bool skip;
1534 int i, rc = 0;
1535
1536 /* Verify wait queue id is configured as master */
1537 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1538 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1539 dev_err(hdev->dev,
1540 "Queue %d is not configured as collective master\n",
1541 wait_queue_id);
1542 return -EINVAL;
1543 }
1544
1545 /* Verify engine id is supported */
1546 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1547 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1548 dev_err(hdev->dev,
1549 "Collective wait does not support engine %u\n",
1550 collective_engine_id);
1551 return -EINVAL;
1552 }
1553
1554 stream = wait_queue_id % 4;
1555
1556 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1557 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1558 else
1559 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1560
1561 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1562 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1563
1564 /* First job goes to the collective master queue, it will wait for
1565 * the collective slave queues to finish execution.
1566 * The synchronization is done using two monitors:
1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1568 * reduction engine (DMA5/TPC7).
1569 *
1570 * Rest of the jobs goes to the collective slave queues which will
1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1572 */
1573 for (i = 0 ; i < num_jobs ; i++) {
1574 if (i == 0) {
1575 queue_id = wait_queue_id;
1576 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1577 HL_COLLECTIVE_MASTER, queue_id,
1578 wait_queue_id, encaps_signal_offset);
1579 } else {
1580 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1581 if (gaudi->hw_cap_initialized &
1582 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1583 skip = false;
1584 else
1585 skip = true;
1586
1587 queue_id = nic_queue;
1588 nic_queue += 4;
1589 nic_idx++;
1590
1591 if (skip)
1592 continue;
1593 } else {
1594 queue_id = collective_queue;
1595 }
1596
1597 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1598 HL_COLLECTIVE_SLAVE, queue_id,
1599 wait_queue_id, encaps_signal_offset);
1600 }
1601
1602 if (rc)
1603 return rc;
1604 }
1605
1606 return rc;
1607 }
1608
gaudi_late_init(struct hl_device * hdev)1609 static int gaudi_late_init(struct hl_device *hdev)
1610 {
1611 struct gaudi_device *gaudi = hdev->asic_specific;
1612 int rc;
1613
1614 rc = gaudi->cpucp_info_get(hdev);
1615 if (rc) {
1616 dev_err(hdev->dev, "Failed to get cpucp info\n");
1617 return rc;
1618 }
1619
1620 if ((hdev->card_type == cpucp_card_type_pci) &&
1621 (hdev->nic_ports_mask & 0x3)) {
1622 dev_info(hdev->dev,
1623 "PCI card detected, only 8 ports are enabled\n");
1624 hdev->nic_ports_mask &= ~0x3;
1625
1626 /* Stop and disable unused NIC QMANs */
1627 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1630
1631 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1634
1635 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1636 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1637
1638 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1639 }
1640
1641 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1642 if (rc)
1643 return rc;
1644
1645 /* Scrub both SRAM and DRAM */
1646 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1647 if (rc)
1648 goto disable_pci_access;
1649
1650 rc = gaudi_fetch_psoc_frequency(hdev);
1651 if (rc) {
1652 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1653 goto disable_pci_access;
1654 }
1655
1656 rc = gaudi_mmu_clear_pgt_range(hdev);
1657 if (rc) {
1658 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1659 goto disable_pci_access;
1660 }
1661
1662 rc = gaudi_init_tpc_mem(hdev);
1663 if (rc) {
1664 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1665 goto disable_pci_access;
1666 }
1667
1668 rc = gaudi_collective_init(hdev);
1669 if (rc) {
1670 dev_err(hdev->dev, "Failed to init collective\n");
1671 goto disable_pci_access;
1672 }
1673
1674 /* We only support a single ASID for the user, so for the sake of optimization, just
1675 * initialize the ASID one time during device initialization with the fixed value of 1
1676 */
1677 gaudi_mmu_prepare(hdev, 1);
1678
1679 hl_fw_set_pll_profile(hdev);
1680
1681 return 0;
1682
1683 disable_pci_access:
1684 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1685
1686 return rc;
1687 }
1688
gaudi_late_fini(struct hl_device * hdev)1689 static void gaudi_late_fini(struct hl_device *hdev)
1690 {
1691 hl_hwmon_release_resources(hdev);
1692 }
1693
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1694 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1695 {
1696 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1697 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1698 int i, j, rc = 0;
1699
1700 /*
1701 * The device CPU works with 40-bits addresses, while bit 39 must be set
1702 * to '1' when accessing the host.
1703 * Bits 49:39 of the full host address are saved for a later
1704 * configuration of the HW to perform extension to 50 bits.
1705 * Because there is a single HW register that holds the extension bits,
1706 * these bits must be identical in all allocated range.
1707 */
1708
1709 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1710 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1711 &dma_addr_arr[i],
1712 GFP_KERNEL | __GFP_ZERO);
1713 if (!virt_addr_arr[i]) {
1714 rc = -ENOMEM;
1715 goto free_dma_mem_arr;
1716 }
1717
1718 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1719 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1720 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1721 break;
1722 }
1723
1724 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1725 dev_err(hdev->dev,
1726 "MSB of CPU accessible DMA memory are not identical in all range\n");
1727 rc = -EFAULT;
1728 goto free_dma_mem_arr;
1729 }
1730
1731 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1732 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1733 hdev->cpu_pci_msb_addr =
1734 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1735
1736 if (!hdev->asic_prop.fw_security_enabled)
1737 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1738
1739 free_dma_mem_arr:
1740 for (j = 0 ; j < i ; j++)
1741 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1742 dma_addr_arr[j]);
1743
1744 return rc;
1745 }
1746
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1747 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1748 {
1749 struct gaudi_device *gaudi = hdev->asic_specific;
1750 struct gaudi_internal_qman_info *q;
1751 u32 i;
1752
1753 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1754 q = &gaudi->internal_qmans[i];
1755 if (!q->pq_kernel_addr)
1756 continue;
1757 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1758 }
1759 }
1760
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1761 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1762 {
1763 struct gaudi_device *gaudi = hdev->asic_specific;
1764 struct gaudi_internal_qman_info *q;
1765 int rc, i;
1766
1767 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1768 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1769 continue;
1770
1771 q = &gaudi->internal_qmans[i];
1772
1773 switch (i) {
1774 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1775 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1776 break;
1777 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1778 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1779 break;
1780 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1781 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1782 break;
1783 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1784 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1785 break;
1786 default:
1787 dev_err(hdev->dev, "Bad internal queue index %d", i);
1788 rc = -EINVAL;
1789 goto free_internal_qmans_pq_mem;
1790 }
1791
1792 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1793 GFP_KERNEL | __GFP_ZERO);
1794 if (!q->pq_kernel_addr) {
1795 rc = -ENOMEM;
1796 goto free_internal_qmans_pq_mem;
1797 }
1798 }
1799
1800 return 0;
1801
1802 free_internal_qmans_pq_mem:
1803 gaudi_free_internal_qmans_pq_mem(hdev);
1804 return rc;
1805 }
1806
gaudi_set_pci_memory_regions(struct hl_device * hdev)1807 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1808 {
1809 struct asic_fixed_properties *prop = &hdev->asic_prop;
1810 struct pci_mem_region *region;
1811
1812 /* CFG */
1813 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1814 region->region_base = CFG_BASE;
1815 region->region_size = CFG_SIZE;
1816 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1817 region->bar_size = CFG_BAR_SIZE;
1818 region->bar_id = CFG_BAR_ID;
1819 region->used = 1;
1820
1821 /* SRAM */
1822 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1823 region->region_base = SRAM_BASE_ADDR;
1824 region->region_size = SRAM_SIZE;
1825 region->offset_in_bar = 0;
1826 region->bar_size = SRAM_BAR_SIZE;
1827 region->bar_id = SRAM_BAR_ID;
1828 region->used = 1;
1829
1830 /* DRAM */
1831 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1832 region->region_base = DRAM_PHYS_BASE;
1833 region->region_size = hdev->asic_prop.dram_size;
1834 region->offset_in_bar = 0;
1835 region->bar_size = prop->dram_pci_bar_size;
1836 region->bar_id = HBM_BAR_ID;
1837 region->used = 1;
1838
1839 /* SP SRAM */
1840 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1841 region->region_base = PSOC_SCRATCHPAD_ADDR;
1842 region->region_size = PSOC_SCRATCHPAD_SIZE;
1843 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1844 region->bar_size = CFG_BAR_SIZE;
1845 region->bar_id = CFG_BAR_ID;
1846 region->used = 1;
1847 }
1848
gaudi_sw_init(struct hl_device * hdev)1849 static int gaudi_sw_init(struct hl_device *hdev)
1850 {
1851 struct gaudi_device *gaudi;
1852 u32 i, event_id = 0;
1853 int rc;
1854
1855 /* Allocate device structure */
1856 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1857 if (!gaudi)
1858 return -ENOMEM;
1859
1860 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1861 if (gaudi_irq_map_table[i].valid) {
1862 if (event_id == GAUDI_EVENT_SIZE) {
1863 dev_err(hdev->dev,
1864 "Event array exceeds the limit of %u events\n",
1865 GAUDI_EVENT_SIZE);
1866 rc = -EINVAL;
1867 goto free_gaudi_device;
1868 }
1869
1870 gaudi->events[event_id++] =
1871 gaudi_irq_map_table[i].fc_id;
1872 }
1873 }
1874
1875 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1876
1877 hdev->asic_specific = gaudi;
1878
1879 /* Create DMA pool for small allocations */
1880 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1881 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1882 if (!hdev->dma_pool) {
1883 dev_err(hdev->dev, "failed to create DMA pool\n");
1884 rc = -ENOMEM;
1885 goto free_gaudi_device;
1886 }
1887
1888 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1889 if (rc)
1890 goto free_dma_pool;
1891
1892 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1893 if (!hdev->cpu_accessible_dma_pool) {
1894 dev_err(hdev->dev,
1895 "Failed to create CPU accessible DMA pool\n");
1896 rc = -ENOMEM;
1897 goto free_cpu_dma_mem;
1898 }
1899
1900 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1901 (uintptr_t) hdev->cpu_accessible_dma_mem,
1902 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1903 if (rc) {
1904 dev_err(hdev->dev,
1905 "Failed to add memory to CPU accessible DMA pool\n");
1906 rc = -EFAULT;
1907 goto free_cpu_accessible_dma_pool;
1908 }
1909
1910 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1911 if (rc)
1912 goto free_cpu_accessible_dma_pool;
1913
1914 spin_lock_init(&gaudi->hw_queues_lock);
1915
1916 hdev->supports_sync_stream = true;
1917 hdev->supports_coresight = true;
1918 hdev->supports_staged_submission = true;
1919 hdev->supports_wait_for_multi_cs = true;
1920
1921 hdev->asic_funcs->set_pci_memory_regions(hdev);
1922 hdev->stream_master_qid_arr =
1923 hdev->asic_funcs->get_stream_master_qid_arr();
1924 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1925
1926 return 0;
1927
1928 free_cpu_accessible_dma_pool:
1929 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1930 free_cpu_dma_mem:
1931 if (!hdev->asic_prop.fw_security_enabled)
1932 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1933 hdev->cpu_pci_msb_addr);
1934 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1935 hdev->cpu_accessible_dma_address);
1936 free_dma_pool:
1937 dma_pool_destroy(hdev->dma_pool);
1938 free_gaudi_device:
1939 kfree(gaudi);
1940 return rc;
1941 }
1942
gaudi_sw_fini(struct hl_device * hdev)1943 static int gaudi_sw_fini(struct hl_device *hdev)
1944 {
1945 struct gaudi_device *gaudi = hdev->asic_specific;
1946
1947 gaudi_free_internal_qmans_pq_mem(hdev);
1948
1949 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1950
1951 if (!hdev->asic_prop.fw_security_enabled)
1952 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1953 hdev->cpu_pci_msb_addr);
1954
1955 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1956 hdev->cpu_accessible_dma_address);
1957
1958 dma_pool_destroy(hdev->dma_pool);
1959
1960 kfree(gaudi);
1961
1962 return 0;
1963 }
1964
gaudi_irq_handler_single(int irq,void * arg)1965 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1966 {
1967 struct hl_device *hdev = arg;
1968 int i;
1969
1970 if (hdev->disabled)
1971 return IRQ_HANDLED;
1972
1973 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1974 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1975
1976 hl_irq_handler_eq(irq, &hdev->event_queue);
1977
1978 return IRQ_HANDLED;
1979 }
1980
1981 /*
1982 * For backward compatibility, new MSI interrupts should be set after the
1983 * existing CPU and NIC interrupts.
1984 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1985 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1986 bool cpu_eq)
1987 {
1988 int msi_vec;
1989
1990 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1991 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1992 GAUDI_EVENT_QUEUE_MSI_IDX);
1993
1994 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1995 (nr + NIC_NUMBER_OF_ENGINES + 1);
1996
1997 return pci_irq_vector(hdev->pdev, msi_vec);
1998 }
1999
gaudi_enable_msi_single(struct hl_device * hdev)2000 static int gaudi_enable_msi_single(struct hl_device *hdev)
2001 {
2002 int rc, irq;
2003
2004 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2005
2006 irq = gaudi_pci_irq_vector(hdev, 0, false);
2007 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2008 "gaudi single msi", hdev);
2009 if (rc)
2010 dev_err(hdev->dev,
2011 "Failed to request single MSI IRQ\n");
2012
2013 return rc;
2014 }
2015
gaudi_enable_msi(struct hl_device * hdev)2016 static int gaudi_enable_msi(struct hl_device *hdev)
2017 {
2018 struct gaudi_device *gaudi = hdev->asic_specific;
2019 int rc;
2020
2021 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2022 return 0;
2023
2024 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2025 if (rc < 0) {
2026 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2027 return rc;
2028 }
2029
2030 rc = gaudi_enable_msi_single(hdev);
2031 if (rc)
2032 goto free_pci_irq_vectors;
2033
2034 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2035
2036 return 0;
2037
2038 free_pci_irq_vectors:
2039 pci_free_irq_vectors(hdev->pdev);
2040 return rc;
2041 }
2042
gaudi_sync_irqs(struct hl_device * hdev)2043 static void gaudi_sync_irqs(struct hl_device *hdev)
2044 {
2045 struct gaudi_device *gaudi = hdev->asic_specific;
2046
2047 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2048 return;
2049
2050 /* Wait for all pending IRQs to be finished */
2051 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2052 }
2053
gaudi_disable_msi(struct hl_device * hdev)2054 static void gaudi_disable_msi(struct hl_device *hdev)
2055 {
2056 struct gaudi_device *gaudi = hdev->asic_specific;
2057
2058 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2059 return;
2060
2061 gaudi_sync_irqs(hdev);
2062 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2063 pci_free_irq_vectors(hdev->pdev);
2064
2065 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2066 }
2067
gaudi_init_scrambler_sram(struct hl_device * hdev)2068 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2069 {
2070 struct gaudi_device *gaudi = hdev->asic_specific;
2071
2072 if (hdev->asic_prop.fw_security_enabled)
2073 return;
2074
2075 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2076 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2077 return;
2078
2079 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2080 return;
2081
2082 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2083 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2084 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2086 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2088 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2090 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2092 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2094 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2096 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2098
2099 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2101 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2103 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2105 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2107 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2109 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2111 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2113 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2115
2116 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2117 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2118 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2120 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2122 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2124 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2126 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2128 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2130 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2132
2133 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2134 }
2135
gaudi_init_scrambler_hbm(struct hl_device * hdev)2136 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2137 {
2138 struct gaudi_device *gaudi = hdev->asic_specific;
2139
2140 if (hdev->asic_prop.fw_security_enabled)
2141 return;
2142
2143 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2144 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2145 return;
2146
2147 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2148 return;
2149
2150 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2152 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2154 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2156 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2158 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2160 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2162 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2164 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2166
2167 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2169 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2171 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2173 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2175 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2177 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2179 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2181 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2183
2184 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2185 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2190 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2194 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2196 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2198 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2200
2201 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2202 }
2203
gaudi_init_e2e(struct hl_device * hdev)2204 static void gaudi_init_e2e(struct hl_device *hdev)
2205 {
2206 if (hdev->asic_prop.fw_security_enabled)
2207 return;
2208
2209 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2210 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2211 return;
2212
2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2214 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2216 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2217
2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2219 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2221 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2222
2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2224 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2226 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2227
2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2229 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2231 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2232
2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2234 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2236 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2237
2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2239 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2241 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2242
2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2244 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2246 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2247
2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2249 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2251 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2252
2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2254 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2256 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2257
2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2259 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2261 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2262
2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2264 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2266 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2267
2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2269 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2271 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2272
2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2274 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2276 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2277
2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2279 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2281 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2282
2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2284 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2286 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2287
2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2289 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2291 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2292
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2297
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2302
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2307
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2312
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2317
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2322
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2327
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2332
2333 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2334 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2335 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2336 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2337
2338 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2339 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2340 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2341 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2342
2343 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2344 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2345 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2346 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2347
2348 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2349 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2350 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2351 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2352
2353 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2354 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2355 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2356 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2357
2358 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2359 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2360 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2361 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2362
2363 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2364 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2365 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2366 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2367
2368 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2369 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2370 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2371 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2372
2373 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2374 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2375 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2376 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2377
2378 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2379 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2380 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2381 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2382
2383 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2384 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2385 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2386 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2387
2388 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2389 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2390 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2391 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2392
2393 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2394 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2395 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2396 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2397
2398 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2399 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2400 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2401 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2402
2403 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2404 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2405 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2406 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2407
2408 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2409 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2410 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2411 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2412
2413 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2414 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2416 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2417
2418 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2419 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2421 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2422
2423 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2424 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2426 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2427
2428 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2429 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2431 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2432
2433 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2434 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2436 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2437
2438 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2439 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2441 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2442
2443 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2444 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2446 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2447
2448 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2449 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2451 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2452 }
2453
gaudi_init_hbm_cred(struct hl_device * hdev)2454 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2455 {
2456 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2457
2458 if (hdev->asic_prop.fw_security_enabled)
2459 return;
2460
2461 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2462 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2463 return;
2464
2465 hbm0_wr = 0x33333333;
2466 hbm0_rd = 0x77777777;
2467 hbm1_wr = 0x55555555;
2468 hbm1_rd = 0xDDDDDDDD;
2469
2470 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2471 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2472 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2473 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2474
2475 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2476 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2477 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2478 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2479
2480 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2481 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2482 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2483 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2484
2485 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2486 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2487 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2488 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2489
2490 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2491 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2492 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2493 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2496 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2499 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2502
2503 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2506 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2509 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2512 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2515 }
2516
gaudi_init_golden_registers(struct hl_device * hdev)2517 static void gaudi_init_golden_registers(struct hl_device *hdev)
2518 {
2519 u32 tpc_offset;
2520 int tpc_id, i;
2521
2522 gaudi_init_e2e(hdev);
2523 gaudi_init_hbm_cred(hdev);
2524
2525 for (tpc_id = 0, tpc_offset = 0;
2526 tpc_id < TPC_NUMBER_OF_ENGINES;
2527 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2528 /* Mask all arithmetic interrupts from TPC */
2529 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2530 /* Set 16 cache lines */
2531 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2532 ICACHE_FETCH_LINE_NUM, 2);
2533 }
2534
2535 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2536 for (i = 0 ; i < 128 ; i += 8)
2537 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2538
2539 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2543 }
2544
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2545 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2546 int qman_id, dma_addr_t qman_pq_addr)
2547 {
2548 struct cpu_dyn_regs *dyn_regs =
2549 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2550 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2551 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2552 u32 q_off, dma_qm_offset;
2553 u32 dma_qm_err_cfg, irq_handler_offset;
2554
2555 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2556
2557 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2558 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2559 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2561 so_base_en_lo = lower_32_bits(CFG_BASE +
2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2563 so_base_en_hi = upper_32_bits(CFG_BASE +
2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2565 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2566 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2567 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2569 so_base_ws_lo = lower_32_bits(CFG_BASE +
2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2571 so_base_ws_hi = upper_32_bits(CFG_BASE +
2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2573
2574 q_off = dma_qm_offset + qman_id * 4;
2575
2576 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2577 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2578
2579 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2580 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2581 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2582
2583 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2584 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2585 QMAN_LDMA_SRC_OFFSET);
2586 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2587 QMAN_LDMA_DST_OFFSET);
2588
2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2596 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2597
2598 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2599
2600 /* The following configuration is needed only once per QMAN */
2601 if (qman_id == 0) {
2602 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2603 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2604 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2605
2606 /* Configure RAZWI IRQ */
2607 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2608 if (hdev->stop_on_err)
2609 dma_qm_err_cfg |=
2610 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2611
2612 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2613
2614 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2615 lower_32_bits(CFG_BASE + irq_handler_offset));
2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2617 upper_32_bits(CFG_BASE + irq_handler_offset));
2618
2619 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2620 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2621 dma_id);
2622
2623 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2624 QM_ARB_ERR_MSG_EN_MASK);
2625
2626 /* Set timeout to maximum */
2627 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2628
2629 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2630 QMAN_EXTERNAL_MAKE_TRUSTED);
2631
2632 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2633 }
2634 }
2635
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2637 {
2638 struct cpu_dyn_regs *dyn_regs =
2639 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2640 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2641 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2642 u32 irq_handler_offset;
2643
2644 /* Set to maximum possible according to physical size */
2645 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2646 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2647
2648 /* WA for H/W bug H3-2116 */
2649 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2650
2651 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2652 if (hdev->stop_on_err)
2653 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2654
2655 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2656
2657 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2659 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2660
2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2662 lower_32_bits(CFG_BASE + irq_handler_offset));
2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2664 upper_32_bits(CFG_BASE + irq_handler_offset));
2665
2666 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2667 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2668 WREG32(mmDMA0_CORE_PROT + dma_offset,
2669 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2670 /* If the channel is secured, it should be in MMU bypass mode */
2671 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2672 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2673 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2674 }
2675
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2677 u32 enable_mask)
2678 {
2679 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2680
2681 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2682 }
2683
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2685 {
2686 struct gaudi_device *gaudi = hdev->asic_specific;
2687 struct hl_hw_queue *q;
2688 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2689
2690 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2691 return;
2692
2693 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2694 dma_id = gaudi_dma_assignment[i];
2695 /*
2696 * For queues after the CPU Q need to add 1 to get the correct
2697 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2698 * order to get the correct MSI register.
2699 */
2700 if (dma_id > 1) {
2701 cpu_skip = 1;
2702 nic_skip = NIC_NUMBER_OF_ENGINES;
2703 } else {
2704 cpu_skip = 0;
2705 nic_skip = 0;
2706 }
2707
2708 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2709 q_idx = 4 * dma_id + j + cpu_skip;
2710 q = &hdev->kernel_queues[q_idx];
2711 q->cq_id = cq_id++;
2712 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2713 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2714 q->bus_address);
2715 }
2716
2717 gaudi_init_dma_core(hdev, dma_id);
2718
2719 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2720 }
2721
2722 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2723 }
2724
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2726 int qman_id, u64 qman_base_addr)
2727 {
2728 struct cpu_dyn_regs *dyn_regs =
2729 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2730 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2731 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2732 u32 dma_qm_err_cfg, irq_handler_offset;
2733 u32 q_off, dma_qm_offset;
2734
2735 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2736
2737 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2739 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2741 so_base_en_lo = lower_32_bits(CFG_BASE +
2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2743 so_base_en_hi = upper_32_bits(CFG_BASE +
2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2745 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2747 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2749 so_base_ws_lo = lower_32_bits(CFG_BASE +
2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2751 so_base_ws_hi = upper_32_bits(CFG_BASE +
2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2753
2754 q_off = dma_qm_offset + qman_id * 4;
2755
2756 if (qman_id < 4) {
2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2758 lower_32_bits(qman_base_addr));
2759 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2760 upper_32_bits(qman_base_addr));
2761
2762 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2763 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2764 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2765
2766 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2767 QMAN_CPDMA_SIZE_OFFSET);
2768 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2769 QMAN_CPDMA_SRC_OFFSET);
2770 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2771 QMAN_CPDMA_DST_OFFSET);
2772 } else {
2773 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2774 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2775 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2776
2777 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2778 QMAN_LDMA_SIZE_OFFSET);
2779 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2780 QMAN_LDMA_SRC_OFFSET);
2781 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2782 QMAN_LDMA_DST_OFFSET);
2783
2784 /* Configure RAZWI IRQ */
2785 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2786 if (hdev->stop_on_err)
2787 dma_qm_err_cfg |=
2788 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2789
2790 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2791
2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2793 lower_32_bits(CFG_BASE + irq_handler_offset));
2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2795 upper_32_bits(CFG_BASE + irq_handler_offset));
2796
2797 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2798 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2799 dma_id);
2800
2801 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2802 QM_ARB_ERR_MSG_EN_MASK);
2803
2804 /* Set timeout to maximum */
2805 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2806
2807 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2808 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2809 QMAN_INTERNAL_MAKE_TRUSTED);
2810 }
2811
2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2816
2817 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2818 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2819 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2820 mtr_base_ws_lo);
2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2822 mtr_base_ws_hi);
2823 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2824 so_base_ws_lo);
2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2826 so_base_ws_hi);
2827 }
2828 }
2829
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2830 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2831 {
2832 struct gaudi_device *gaudi = hdev->asic_specific;
2833 struct gaudi_internal_qman_info *q;
2834 u64 qman_base_addr;
2835 int i, j, dma_id, internal_q_index;
2836
2837 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2838 return;
2839
2840 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2841 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2842
2843 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2844 /*
2845 * Add the CPU queue in order to get the correct queue
2846 * number as all internal queue are placed after it
2847 */
2848 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2849
2850 q = &gaudi->internal_qmans[internal_q_index];
2851 qman_base_addr = (u64) q->pq_dma_addr;
2852 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2853 qman_base_addr);
2854 }
2855
2856 /* Initializing lower CP for HBM DMA QMAN */
2857 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2858
2859 gaudi_init_dma_core(hdev, dma_id);
2860
2861 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2862 }
2863
2864 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2865 }
2866
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2867 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2868 int qman_id, u64 qman_base_addr)
2869 {
2870 struct cpu_dyn_regs *dyn_regs =
2871 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2872 u32 mtr_base_lo, mtr_base_hi;
2873 u32 so_base_lo, so_base_hi;
2874 u32 irq_handler_offset;
2875 u32 q_off, mme_id;
2876 u32 mme_qm_err_cfg;
2877
2878 mtr_base_lo = lower_32_bits(CFG_BASE +
2879 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2880 mtr_base_hi = upper_32_bits(CFG_BASE +
2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2882 so_base_lo = lower_32_bits(CFG_BASE +
2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2884 so_base_hi = upper_32_bits(CFG_BASE +
2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2886
2887 q_off = mme_offset + qman_id * 4;
2888
2889 if (qman_id < 4) {
2890 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2891 lower_32_bits(qman_base_addr));
2892 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2893 upper_32_bits(qman_base_addr));
2894
2895 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2896 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2897 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2898
2899 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2900 QMAN_CPDMA_SIZE_OFFSET);
2901 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2902 QMAN_CPDMA_SRC_OFFSET);
2903 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2904 QMAN_CPDMA_DST_OFFSET);
2905 } else {
2906 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2907 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2908 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2909
2910 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2911 QMAN_LDMA_SIZE_OFFSET);
2912 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2913 QMAN_LDMA_SRC_OFFSET);
2914 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2915 QMAN_LDMA_DST_OFFSET);
2916
2917 /* Configure RAZWI IRQ */
2918 mme_id = mme_offset /
2919 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2920
2921 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2922 if (hdev->stop_on_err)
2923 mme_qm_err_cfg |=
2924 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2925
2926 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2927
2928 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2929 lower_32_bits(CFG_BASE + irq_handler_offset));
2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2931 upper_32_bits(CFG_BASE + irq_handler_offset));
2932
2933 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2934 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2935 mme_id);
2936
2937 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2938 QM_ARB_ERR_MSG_EN_MASK);
2939
2940 /* Set timeout to maximum */
2941 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2942
2943 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2944 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2945 QMAN_INTERNAL_MAKE_TRUSTED);
2946 }
2947
2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2952 }
2953
gaudi_init_mme_qmans(struct hl_device * hdev)2954 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2955 {
2956 struct gaudi_device *gaudi = hdev->asic_specific;
2957 struct gaudi_internal_qman_info *q;
2958 u64 qman_base_addr;
2959 u32 mme_offset;
2960 int i, internal_q_index;
2961
2962 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2963 return;
2964
2965 /*
2966 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2967 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2968 */
2969
2970 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2971
2972 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2973 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2974 q = &gaudi->internal_qmans[internal_q_index];
2975 qman_base_addr = (u64) q->pq_dma_addr;
2976 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2977 qman_base_addr);
2978 if (i == 3)
2979 mme_offset = 0;
2980 }
2981
2982 /* Initializing lower CP for MME QMANs */
2983 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2984 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2985 gaudi_init_mme_qman(hdev, 0, 4, 0);
2986
2987 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2989
2990 gaudi->hw_cap_initialized |= HW_CAP_MME;
2991 }
2992
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2993 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2994 int qman_id, u64 qman_base_addr)
2995 {
2996 struct cpu_dyn_regs *dyn_regs =
2997 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2998 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2999 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3000 u32 tpc_qm_err_cfg, irq_handler_offset;
3001 u32 q_off, tpc_id;
3002
3003 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3005 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3007 so_base_en_lo = lower_32_bits(CFG_BASE +
3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3009 so_base_en_hi = upper_32_bits(CFG_BASE +
3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3011 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3012 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3013 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3015 so_base_ws_lo = lower_32_bits(CFG_BASE +
3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3017 so_base_ws_hi = upper_32_bits(CFG_BASE +
3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3019
3020 q_off = tpc_offset + qman_id * 4;
3021
3022 tpc_id = tpc_offset /
3023 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3024
3025 if (qman_id < 4) {
3026 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3027 lower_32_bits(qman_base_addr));
3028 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3029 upper_32_bits(qman_base_addr));
3030
3031 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3032 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3033 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3034
3035 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3036 QMAN_CPDMA_SIZE_OFFSET);
3037 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3038 QMAN_CPDMA_SRC_OFFSET);
3039 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3040 QMAN_CPDMA_DST_OFFSET);
3041 } else {
3042 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3044 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3045
3046 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3047 QMAN_LDMA_SIZE_OFFSET);
3048 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3049 QMAN_LDMA_SRC_OFFSET);
3050 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3051 QMAN_LDMA_DST_OFFSET);
3052
3053 /* Configure RAZWI IRQ */
3054 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3055 if (hdev->stop_on_err)
3056 tpc_qm_err_cfg |=
3057 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3058
3059 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3060
3061 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3062 lower_32_bits(CFG_BASE + irq_handler_offset));
3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3064 upper_32_bits(CFG_BASE + irq_handler_offset));
3065
3066 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3067 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3068 tpc_id);
3069
3070 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3071 QM_ARB_ERR_MSG_EN_MASK);
3072
3073 /* Set timeout to maximum */
3074 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3075
3076 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3077 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3078 QMAN_INTERNAL_MAKE_TRUSTED);
3079 }
3080
3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3085
3086 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3087 if (tpc_id == 6) {
3088 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3089 mtr_base_ws_lo);
3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3091 mtr_base_ws_hi);
3092 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3093 so_base_ws_lo);
3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3095 so_base_ws_hi);
3096 }
3097 }
3098
gaudi_init_tpc_qmans(struct hl_device * hdev)3099 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3100 {
3101 struct gaudi_device *gaudi = hdev->asic_specific;
3102 struct gaudi_internal_qman_info *q;
3103 u64 qman_base_addr;
3104 u32 so_base_hi, tpc_offset = 0;
3105 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3106 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3107 int i, tpc_id, internal_q_index;
3108
3109 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3110 return;
3111
3112 so_base_hi = upper_32_bits(CFG_BASE +
3113 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3114
3115 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3116 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3117 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3118 tpc_id * QMAN_STREAMS + i;
3119 q = &gaudi->internal_qmans[internal_q_index];
3120 qman_base_addr = (u64) q->pq_dma_addr;
3121 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3122 qman_base_addr);
3123
3124 if (i == 3) {
3125 /* Initializing lower CP for TPC QMAN */
3126 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3127
3128 /* Enable the QMAN and TPC channel */
3129 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3130 QMAN_TPC_ENABLE);
3131 }
3132 }
3133
3134 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3135 so_base_hi);
3136
3137 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3138
3139 gaudi->hw_cap_initialized |=
3140 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3141 }
3142 }
3143
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3144 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3145 int qman_id, u64 qman_base_addr, int nic_id)
3146 {
3147 struct cpu_dyn_regs *dyn_regs =
3148 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3149 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3150 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3151 u32 nic_qm_err_cfg, irq_handler_offset;
3152 u32 q_off;
3153
3154 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3155 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3156 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3158 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3160 so_base_en_hi = upper_32_bits(CFG_BASE +
3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3162 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3163 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3164 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3166 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3168 so_base_ws_hi = upper_32_bits(CFG_BASE +
3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3170
3171 q_off = nic_offset + qman_id * 4;
3172
3173 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3174 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3175
3176 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3177 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3178 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3179
3180 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3181 QMAN_LDMA_SIZE_OFFSET);
3182 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3183 QMAN_LDMA_SRC_OFFSET);
3184 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3185 QMAN_LDMA_DST_OFFSET);
3186
3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3191
3192 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3197
3198 if (qman_id == 0) {
3199 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3200 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3201 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3202
3203 /* Configure RAZWI IRQ */
3204 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3205 if (hdev->stop_on_err)
3206 nic_qm_err_cfg |=
3207 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3208
3209 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3210
3211 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3212 lower_32_bits(CFG_BASE + irq_handler_offset));
3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3214 upper_32_bits(CFG_BASE + irq_handler_offset));
3215
3216 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3217 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3218 nic_id);
3219
3220 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3221 QM_ARB_ERR_MSG_EN_MASK);
3222
3223 /* Set timeout to maximum */
3224 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3225
3226 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3227 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3228 QMAN_INTERNAL_MAKE_TRUSTED);
3229 }
3230 }
3231
gaudi_init_nic_qmans(struct hl_device * hdev)3232 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3233 {
3234 struct gaudi_device *gaudi = hdev->asic_specific;
3235 struct gaudi_internal_qman_info *q;
3236 u64 qman_base_addr;
3237 u32 nic_offset = 0;
3238 u32 nic_delta_between_qmans =
3239 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3240 u32 nic_delta_between_nics =
3241 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3242 int i, nic_id, internal_q_index;
3243
3244 if (!hdev->nic_ports_mask)
3245 return;
3246
3247 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3248 return;
3249
3250 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3251
3252 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3253 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3254 nic_offset += nic_delta_between_qmans;
3255 if (nic_id & 1) {
3256 nic_offset -= (nic_delta_between_qmans * 2);
3257 nic_offset += nic_delta_between_nics;
3258 }
3259 continue;
3260 }
3261
3262 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3263 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3264 nic_id * QMAN_STREAMS + i;
3265 q = &gaudi->internal_qmans[internal_q_index];
3266 qman_base_addr = (u64) q->pq_dma_addr;
3267 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3268 qman_base_addr, nic_id);
3269 }
3270
3271 /* Enable the QMAN */
3272 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3273
3274 nic_offset += nic_delta_between_qmans;
3275 if (nic_id & 1) {
3276 nic_offset -= (nic_delta_between_qmans * 2);
3277 nic_offset += nic_delta_between_nics;
3278 }
3279
3280 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3281 }
3282 }
3283
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3284 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3285 {
3286 struct gaudi_device *gaudi = hdev->asic_specific;
3287
3288 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3289 return;
3290
3291 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3292 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3293 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3294 }
3295
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3296 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3297 {
3298 struct gaudi_device *gaudi = hdev->asic_specific;
3299
3300 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3301 return;
3302
3303 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3304 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3305 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3307 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3308 }
3309
gaudi_disable_mme_qmans(struct hl_device * hdev)3310 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3311 {
3312 struct gaudi_device *gaudi = hdev->asic_specific;
3313
3314 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3315 return;
3316
3317 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3318 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3319 }
3320
gaudi_disable_tpc_qmans(struct hl_device * hdev)3321 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3322 {
3323 struct gaudi_device *gaudi = hdev->asic_specific;
3324 u32 tpc_offset = 0;
3325 int tpc_id;
3326
3327 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3328 return;
3329
3330 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3331 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3332 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3333 }
3334 }
3335
gaudi_disable_nic_qmans(struct hl_device * hdev)3336 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3337 {
3338 struct gaudi_device *gaudi = hdev->asic_specific;
3339 u32 nic_mask, nic_offset = 0;
3340 u32 nic_delta_between_qmans =
3341 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3342 u32 nic_delta_between_nics =
3343 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3344 int nic_id;
3345
3346 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3347 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3348
3349 if (gaudi->hw_cap_initialized & nic_mask)
3350 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3351
3352 nic_offset += nic_delta_between_qmans;
3353 if (nic_id & 1) {
3354 nic_offset -= (nic_delta_between_qmans * 2);
3355 nic_offset += nic_delta_between_nics;
3356 }
3357 }
3358 }
3359
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3360 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3361 {
3362 struct gaudi_device *gaudi = hdev->asic_specific;
3363
3364 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3365 return;
3366
3367 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3368 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3371 }
3372
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3373 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3374 {
3375 struct gaudi_device *gaudi = hdev->asic_specific;
3376
3377 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3378 return;
3379
3380 /* Stop CPs of HBM DMA QMANs */
3381
3382 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3387 }
3388
gaudi_stop_mme_qmans(struct hl_device * hdev)3389 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3390 {
3391 struct gaudi_device *gaudi = hdev->asic_specific;
3392
3393 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3394 return;
3395
3396 /* Stop CPs of MME QMANs */
3397 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3399 }
3400
gaudi_stop_tpc_qmans(struct hl_device * hdev)3401 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3402 {
3403 struct gaudi_device *gaudi = hdev->asic_specific;
3404
3405 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3406 return;
3407
3408 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3416 }
3417
gaudi_stop_nic_qmans(struct hl_device * hdev)3418 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3419 {
3420 struct gaudi_device *gaudi = hdev->asic_specific;
3421
3422 /* Stop upper CPs of QMANs */
3423
3424 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3425 WREG32(mmNIC0_QM0_GLBL_CFG1,
3426 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3427 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3428 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3429
3430 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3431 WREG32(mmNIC0_QM1_GLBL_CFG1,
3432 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3433 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3434 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3435
3436 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3437 WREG32(mmNIC1_QM0_GLBL_CFG1,
3438 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3439 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3440 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3441
3442 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3443 WREG32(mmNIC1_QM1_GLBL_CFG1,
3444 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3445 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3446 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3447
3448 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3449 WREG32(mmNIC2_QM0_GLBL_CFG1,
3450 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3451 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3452 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3453
3454 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3455 WREG32(mmNIC2_QM1_GLBL_CFG1,
3456 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3457 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3458 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3459
3460 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3461 WREG32(mmNIC3_QM0_GLBL_CFG1,
3462 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3463 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3464 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3465
3466 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3467 WREG32(mmNIC3_QM1_GLBL_CFG1,
3468 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3469 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3470 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3471
3472 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3473 WREG32(mmNIC4_QM0_GLBL_CFG1,
3474 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3475 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3476 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3477
3478 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3479 WREG32(mmNIC4_QM1_GLBL_CFG1,
3480 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3481 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3482 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3483 }
3484
gaudi_pci_dma_stall(struct hl_device * hdev)3485 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3486 {
3487 struct gaudi_device *gaudi = hdev->asic_specific;
3488
3489 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3490 return;
3491
3492 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3495 }
3496
gaudi_hbm_dma_stall(struct hl_device * hdev)3497 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3498 {
3499 struct gaudi_device *gaudi = hdev->asic_specific;
3500
3501 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3502 return;
3503
3504 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3509 }
3510
gaudi_mme_stall(struct hl_device * hdev)3511 static void gaudi_mme_stall(struct hl_device *hdev)
3512 {
3513 struct gaudi_device *gaudi = hdev->asic_specific;
3514
3515 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3516 return;
3517
3518 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3535 }
3536
gaudi_tpc_stall(struct hl_device * hdev)3537 static void gaudi_tpc_stall(struct hl_device *hdev)
3538 {
3539 struct gaudi_device *gaudi = hdev->asic_specific;
3540
3541 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3542 return;
3543
3544 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3552 }
3553
gaudi_disable_clock_gating(struct hl_device * hdev)3554 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3555 {
3556 u32 qman_offset;
3557 int i;
3558
3559 if (hdev->asic_prop.fw_security_enabled)
3560 return;
3561
3562 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3563 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3564 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3565
3566 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3567 }
3568
3569 WREG32(mmMME0_QM_CGM_CFG, 0);
3570 WREG32(mmMME0_QM_CGM_CFG1, 0);
3571 WREG32(mmMME2_QM_CGM_CFG, 0);
3572 WREG32(mmMME2_QM_CGM_CFG1, 0);
3573
3574 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3575 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3576 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3577
3578 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3579 }
3580 }
3581
gaudi_enable_timestamp(struct hl_device * hdev)3582 static void gaudi_enable_timestamp(struct hl_device *hdev)
3583 {
3584 /* Disable the timestamp counter */
3585 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3586
3587 /* Zero the lower/upper parts of the 64-bit counter */
3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3589 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3590
3591 /* Enable the counter */
3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3593 }
3594
gaudi_disable_timestamp(struct hl_device * hdev)3595 static void gaudi_disable_timestamp(struct hl_device *hdev)
3596 {
3597 /* Disable the timestamp counter */
3598 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3599 }
3600
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3601 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3602 {
3603 u32 wait_timeout_ms;
3604
3605 if (hdev->pldm)
3606 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3607 else
3608 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3609
3610 if (fw_reset)
3611 goto skip_engines;
3612
3613 gaudi_stop_nic_qmans(hdev);
3614 gaudi_stop_mme_qmans(hdev);
3615 gaudi_stop_tpc_qmans(hdev);
3616 gaudi_stop_hbm_dma_qmans(hdev);
3617 gaudi_stop_pci_dma_qmans(hdev);
3618
3619 msleep(wait_timeout_ms);
3620
3621 gaudi_pci_dma_stall(hdev);
3622 gaudi_hbm_dma_stall(hdev);
3623 gaudi_tpc_stall(hdev);
3624 gaudi_mme_stall(hdev);
3625
3626 msleep(wait_timeout_ms);
3627
3628 gaudi_disable_nic_qmans(hdev);
3629 gaudi_disable_mme_qmans(hdev);
3630 gaudi_disable_tpc_qmans(hdev);
3631 gaudi_disable_hbm_dma_qmans(hdev);
3632 gaudi_disable_pci_dma_qmans(hdev);
3633
3634 gaudi_disable_timestamp(hdev);
3635
3636 skip_engines:
3637 gaudi_disable_msi(hdev);
3638 }
3639
gaudi_mmu_init(struct hl_device * hdev)3640 static int gaudi_mmu_init(struct hl_device *hdev)
3641 {
3642 struct asic_fixed_properties *prop = &hdev->asic_prop;
3643 struct gaudi_device *gaudi = hdev->asic_specific;
3644 u64 hop0_addr;
3645 int rc, i;
3646
3647 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3648 return 0;
3649
3650 for (i = 0 ; i < prop->max_asid ; i++) {
3651 hop0_addr = prop->mmu_pgt_addr +
3652 (i * prop->dmmu.hop_table_size);
3653
3654 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3655 if (rc) {
3656 dev_err(hdev->dev,
3657 "failed to set hop0 addr for asid %d\n", i);
3658 return rc;
3659 }
3660 }
3661
3662 /* init MMU cache manage page */
3663 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3664 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3665
3666 /* mem cache invalidation */
3667 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3668
3669 rc = hl_mmu_invalidate_cache(hdev, true, 0);
3670 if (rc)
3671 return rc;
3672
3673 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3674 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3675
3676 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3677
3678 /*
3679 * The H/W expects the first PI after init to be 1. After wraparound
3680 * we'll write 0.
3681 */
3682 gaudi->mmu_cache_inv_pi = 1;
3683
3684 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3685
3686 return 0;
3687 }
3688
gaudi_load_firmware_to_device(struct hl_device * hdev)3689 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3690 {
3691 void __iomem *dst;
3692
3693 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3694
3695 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3696 }
3697
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3698 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3699 {
3700 void __iomem *dst;
3701
3702 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3703
3704 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3705 }
3706
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3707 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3708 {
3709 struct dynamic_fw_load_mgr *dynamic_loader;
3710 struct cpu_dyn_regs *dyn_regs;
3711
3712 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3713
3714 /*
3715 * here we update initial values for few specific dynamic regs (as
3716 * before reading the first descriptor from FW those value has to be
3717 * hard-coded) in later stages of the protocol those values will be
3718 * updated automatically by reading the FW descriptor so data there
3719 * will always be up-to-date
3720 */
3721 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3722 dyn_regs->kmd_msg_to_cpu =
3723 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3724 dyn_regs->cpu_cmd_status_to_host =
3725 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3726
3727 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3728 }
3729
gaudi_init_static_firmware_loader(struct hl_device * hdev)3730 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3731 {
3732 struct static_fw_load_mgr *static_loader;
3733
3734 static_loader = &hdev->fw_loader.static_loader;
3735
3736 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3738 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3739 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3740 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3741 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3742 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3743 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3744 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3745 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3746 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3747 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3748 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3749 GAUDI_PLDM_RESET_WAIT_MSEC :
3750 GAUDI_CPU_RESET_WAIT_MSEC;
3751 }
3752
gaudi_init_firmware_preload_params(struct hl_device * hdev)3753 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3754 {
3755 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3756
3757 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3758 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3759 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3760 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3761 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3762 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3763 }
3764
gaudi_init_firmware_loader(struct hl_device * hdev)3765 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3766 {
3767 struct asic_fixed_properties *prop = &hdev->asic_prop;
3768 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3769
3770 /* fill common fields */
3771 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3772 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3773 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3774 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3775 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3776 fw_loader->skip_bmc = !hdev->bmc_enable;
3777 fw_loader->sram_bar_id = SRAM_BAR_ID;
3778 fw_loader->dram_bar_id = HBM_BAR_ID;
3779
3780 if (prop->dynamic_fw_load)
3781 gaudi_init_dynamic_firmware_loader(hdev);
3782 else
3783 gaudi_init_static_firmware_loader(hdev);
3784 }
3785
gaudi_init_cpu(struct hl_device * hdev)3786 static int gaudi_init_cpu(struct hl_device *hdev)
3787 {
3788 struct gaudi_device *gaudi = hdev->asic_specific;
3789 int rc;
3790
3791 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3792 return 0;
3793
3794 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3795 return 0;
3796
3797 /*
3798 * The device CPU works with 40 bits addresses.
3799 * This register sets the extension to 50 bits.
3800 */
3801 if (!hdev->asic_prop.fw_security_enabled)
3802 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3803
3804 rc = hl_fw_init_cpu(hdev);
3805
3806 if (rc)
3807 return rc;
3808
3809 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3810
3811 return 0;
3812 }
3813
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3814 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3815 {
3816 struct cpu_dyn_regs *dyn_regs =
3817 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3818 struct asic_fixed_properties *prop = &hdev->asic_prop;
3819 struct gaudi_device *gaudi = hdev->asic_specific;
3820 u32 status, irq_handler_offset;
3821 struct hl_eq *eq;
3822 struct hl_hw_queue *cpu_pq =
3823 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3824 int err;
3825
3826 if (!hdev->cpu_queues_enable)
3827 return 0;
3828
3829 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3830 return 0;
3831
3832 eq = &hdev->event_queue;
3833
3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3835 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3836
3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3838 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3839
3840 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3841 lower_32_bits(hdev->cpu_accessible_dma_address));
3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3843 upper_32_bits(hdev->cpu_accessible_dma_address));
3844
3845 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3846 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3847 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3848
3849 /* Used for EQ CI */
3850 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3851
3852 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3853
3854 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3855
3856 irq_handler_offset = prop->gic_interrupts_enable ?
3857 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3858 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3859
3860 WREG32(irq_handler_offset,
3861 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3862
3863 err = hl_poll_timeout(
3864 hdev,
3865 mmCPU_IF_QUEUE_INIT,
3866 status,
3867 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3868 1000,
3869 cpu_timeout);
3870
3871 if (err) {
3872 dev_err(hdev->dev,
3873 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3874 return -EIO;
3875 }
3876
3877 /* update FW application security bits */
3878 if (prop->fw_cpu_boot_dev_sts0_valid)
3879 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3880 if (prop->fw_cpu_boot_dev_sts1_valid)
3881 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3882
3883 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3884 return 0;
3885 }
3886
gaudi_pre_hw_init(struct hl_device * hdev)3887 static void gaudi_pre_hw_init(struct hl_device *hdev)
3888 {
3889 /* Perform read from the device to make sure device is up */
3890 RREG32(mmHW_STATE);
3891
3892 if (!hdev->asic_prop.fw_security_enabled) {
3893 /* Set the access through PCI bars (Linux driver only) as
3894 * secured
3895 */
3896 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3897 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3898 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3899
3900 /* Perform read to flush the waiting writes to ensure
3901 * configuration was set in the device
3902 */
3903 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3904 }
3905
3906 /*
3907 * Let's mark in the H/W that we have reached this point. We check
3908 * this value in the reset_before_init function to understand whether
3909 * we need to reset the chip before doing H/W init. This register is
3910 * cleared by the H/W upon H/W reset
3911 */
3912 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3913 }
3914
gaudi_hw_init(struct hl_device * hdev)3915 static int gaudi_hw_init(struct hl_device *hdev)
3916 {
3917 struct gaudi_device *gaudi = hdev->asic_specific;
3918 int rc;
3919
3920 gaudi_pre_hw_init(hdev);
3921
3922 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3923 * So we set it here and if anyone tries to move it later to
3924 * a different address, there will be an error
3925 */
3926 if (hdev->asic_prop.iatu_done_by_fw)
3927 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3928
3929 /*
3930 * Before pushing u-boot/linux to device, need to set the hbm bar to
3931 * base address of dram
3932 */
3933 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3934 dev_err(hdev->dev,
3935 "failed to map HBM bar to DRAM base address\n");
3936 return -EIO;
3937 }
3938
3939 rc = gaudi_init_cpu(hdev);
3940 if (rc) {
3941 dev_err(hdev->dev, "failed to initialize CPU\n");
3942 return rc;
3943 }
3944
3945 /* In case the clock gating was enabled in preboot we need to disable
3946 * it here before touching the MME/TPC registers.
3947 */
3948 gaudi_disable_clock_gating(hdev);
3949
3950 /* SRAM scrambler must be initialized after CPU is running from HBM */
3951 gaudi_init_scrambler_sram(hdev);
3952
3953 /* This is here just in case we are working without CPU */
3954 gaudi_init_scrambler_hbm(hdev);
3955
3956 gaudi_init_golden_registers(hdev);
3957
3958 rc = gaudi_mmu_init(hdev);
3959 if (rc)
3960 return rc;
3961
3962 gaudi_init_security(hdev);
3963
3964 gaudi_init_pci_dma_qmans(hdev);
3965
3966 gaudi_init_hbm_dma_qmans(hdev);
3967
3968 gaudi_init_mme_qmans(hdev);
3969
3970 gaudi_init_tpc_qmans(hdev);
3971
3972 gaudi_init_nic_qmans(hdev);
3973
3974 gaudi_enable_timestamp(hdev);
3975
3976 /* MSI must be enabled before CPU queues and NIC are initialized */
3977 rc = gaudi_enable_msi(hdev);
3978 if (rc)
3979 goto disable_queues;
3980
3981 /* must be called after MSI was enabled */
3982 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3983 if (rc) {
3984 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3985 rc);
3986 goto disable_msi;
3987 }
3988
3989 /* Perform read from the device to flush all configuration */
3990 RREG32(mmHW_STATE);
3991
3992 return 0;
3993
3994 disable_msi:
3995 gaudi_disable_msi(hdev);
3996 disable_queues:
3997 gaudi_disable_mme_qmans(hdev);
3998 gaudi_disable_pci_dma_qmans(hdev);
3999
4000 return rc;
4001 }
4002
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4003 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4004 {
4005 struct cpu_dyn_regs *dyn_regs =
4006 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4007 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4008 struct gaudi_device *gaudi = hdev->asic_specific;
4009 bool driver_performs_reset;
4010
4011 if (!hard_reset) {
4012 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4013 return 0;
4014 }
4015
4016 if (hdev->pldm) {
4017 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4018 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4019 } else {
4020 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4021 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4022 }
4023
4024 if (fw_reset) {
4025 dev_dbg(hdev->dev,
4026 "Firmware performs HARD reset, going to wait %dms\n",
4027 reset_timeout_ms);
4028
4029 goto skip_reset;
4030 }
4031
4032 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4033 !hdev->asic_prop.hard_reset_done_by_fw);
4034
4035 /* Set device to handle FLR by H/W as we will put the device CPU to
4036 * halt mode
4037 */
4038 if (driver_performs_reset)
4039 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4041
4042 /* If linux is loaded in the device CPU we need to communicate with it
4043 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4044 * registers in case of old F/Ws
4045 */
4046 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4047 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4048 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4049 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4050
4051 WREG32(irq_handler_offset,
4052 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4053
4054 /* This is a hail-mary attempt to revive the card in the small chance that the
4055 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4056 * In that case, triggering reset through GIC won't help. We need to trigger the
4057 * reset as if Linux wasn't loaded.
4058 *
4059 * We do it only if the reset cause was HB, because that would be the indication
4060 * of such an event.
4061 *
4062 * In case watchdog hasn't expired but we still got HB, then this won't do any
4063 * damage.
4064 */
4065 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4066 if (hdev->asic_prop.hard_reset_done_by_fw)
4067 hl_fw_ask_hard_reset_without_linux(hdev);
4068 else
4069 hl_fw_ask_halt_machine_without_linux(hdev);
4070 }
4071 } else {
4072 if (hdev->asic_prop.hard_reset_done_by_fw)
4073 hl_fw_ask_hard_reset_without_linux(hdev);
4074 else
4075 hl_fw_ask_halt_machine_without_linux(hdev);
4076 }
4077
4078 if (driver_performs_reset) {
4079
4080 /* Configure the reset registers. Must be done as early as
4081 * possible in case we fail during H/W initialization
4082 */
4083 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4084 (CFG_RST_H_DMA_MASK |
4085 CFG_RST_H_MME_MASK |
4086 CFG_RST_H_SM_MASK |
4087 CFG_RST_H_TPC_7_MASK));
4088
4089 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4090
4091 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4092 (CFG_RST_H_HBM_MASK |
4093 CFG_RST_H_TPC_7_MASK |
4094 CFG_RST_H_NIC_MASK |
4095 CFG_RST_H_SM_MASK |
4096 CFG_RST_H_DMA_MASK |
4097 CFG_RST_H_MME_MASK |
4098 CFG_RST_H_CPU_MASK |
4099 CFG_RST_H_MMU_MASK));
4100
4101 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4102 (CFG_RST_L_IF_MASK |
4103 CFG_RST_L_PSOC_MASK |
4104 CFG_RST_L_TPC_MASK));
4105
4106 msleep(cpu_timeout_ms);
4107
4108 /* Tell ASIC not to re-initialize PCIe */
4109 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4110
4111 /* Restart BTL/BLR upon hard-reset */
4112 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4113
4114 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4115 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4116
4117 dev_dbg(hdev->dev,
4118 "Issued HARD reset command, going to wait %dms\n",
4119 reset_timeout_ms);
4120 } else {
4121 dev_dbg(hdev->dev,
4122 "Firmware performs HARD reset, going to wait %dms\n",
4123 reset_timeout_ms);
4124 }
4125
4126 skip_reset:
4127 /*
4128 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4129 * itself is in reset. Need to wait until the reset is deasserted
4130 */
4131 msleep(reset_timeout_ms);
4132
4133 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4134 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4135 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4136 return -ETIMEDOUT;
4137 }
4138
4139 if (gaudi) {
4140 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4141 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4142 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4143 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4144 HW_CAP_HBM_SCRAMBLER);
4145
4146 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4147
4148 hdev->device_cpu_is_halted = false;
4149 }
4150 return 0;
4151 }
4152
gaudi_suspend(struct hl_device * hdev)4153 static int gaudi_suspend(struct hl_device *hdev)
4154 {
4155 return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4156 }
4157
gaudi_resume(struct hl_device * hdev)4158 static int gaudi_resume(struct hl_device *hdev)
4159 {
4160 return gaudi_init_iatu(hdev);
4161 }
4162
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4163 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4164 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4165 {
4166 int rc;
4167
4168 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4169 VM_DONTCOPY | VM_NORESERVE);
4170
4171 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4172 (dma_addr - HOST_PHYS_BASE), size);
4173 if (rc)
4174 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4175
4176 return rc;
4177 }
4178
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4179 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4180 {
4181 struct cpu_dyn_regs *dyn_regs =
4182 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4183 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4184 struct gaudi_device *gaudi = hdev->asic_specific;
4185 bool invalid_queue = false;
4186 int dma_id;
4187
4188 switch (hw_queue_id) {
4189 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4190 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4191 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4192 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4193 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4194 break;
4195
4196 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4197 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4198 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4199 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4200 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4201 break;
4202
4203 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4204 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4205 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4206 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4207 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4208 break;
4209
4210 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4211 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4212 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4213 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4214 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4215 break;
4216
4217 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4218 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4219 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4220 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4221 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4222 break;
4223
4224 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4225 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4226 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4227 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4228 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4229 break;
4230
4231 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4232 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4233 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4234 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4235 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4236 break;
4237
4238 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4239 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4240 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4241 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4242 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4243 break;
4244
4245 case GAUDI_QUEUE_ID_CPU_PQ:
4246 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4247 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4248 else
4249 invalid_queue = true;
4250 break;
4251
4252 case GAUDI_QUEUE_ID_MME_0_0:
4253 db_reg_offset = mmMME2_QM_PQ_PI_0;
4254 break;
4255
4256 case GAUDI_QUEUE_ID_MME_0_1:
4257 db_reg_offset = mmMME2_QM_PQ_PI_1;
4258 break;
4259
4260 case GAUDI_QUEUE_ID_MME_0_2:
4261 db_reg_offset = mmMME2_QM_PQ_PI_2;
4262 break;
4263
4264 case GAUDI_QUEUE_ID_MME_0_3:
4265 db_reg_offset = mmMME2_QM_PQ_PI_3;
4266 break;
4267
4268 case GAUDI_QUEUE_ID_MME_1_0:
4269 db_reg_offset = mmMME0_QM_PQ_PI_0;
4270 break;
4271
4272 case GAUDI_QUEUE_ID_MME_1_1:
4273 db_reg_offset = mmMME0_QM_PQ_PI_1;
4274 break;
4275
4276 case GAUDI_QUEUE_ID_MME_1_2:
4277 db_reg_offset = mmMME0_QM_PQ_PI_2;
4278 break;
4279
4280 case GAUDI_QUEUE_ID_MME_1_3:
4281 db_reg_offset = mmMME0_QM_PQ_PI_3;
4282 break;
4283
4284 case GAUDI_QUEUE_ID_TPC_0_0:
4285 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4286 break;
4287
4288 case GAUDI_QUEUE_ID_TPC_0_1:
4289 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4290 break;
4291
4292 case GAUDI_QUEUE_ID_TPC_0_2:
4293 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4294 break;
4295
4296 case GAUDI_QUEUE_ID_TPC_0_3:
4297 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4298 break;
4299
4300 case GAUDI_QUEUE_ID_TPC_1_0:
4301 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4302 break;
4303
4304 case GAUDI_QUEUE_ID_TPC_1_1:
4305 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4306 break;
4307
4308 case GAUDI_QUEUE_ID_TPC_1_2:
4309 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4310 break;
4311
4312 case GAUDI_QUEUE_ID_TPC_1_3:
4313 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4314 break;
4315
4316 case GAUDI_QUEUE_ID_TPC_2_0:
4317 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4318 break;
4319
4320 case GAUDI_QUEUE_ID_TPC_2_1:
4321 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4322 break;
4323
4324 case GAUDI_QUEUE_ID_TPC_2_2:
4325 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4326 break;
4327
4328 case GAUDI_QUEUE_ID_TPC_2_3:
4329 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4330 break;
4331
4332 case GAUDI_QUEUE_ID_TPC_3_0:
4333 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4334 break;
4335
4336 case GAUDI_QUEUE_ID_TPC_3_1:
4337 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4338 break;
4339
4340 case GAUDI_QUEUE_ID_TPC_3_2:
4341 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4342 break;
4343
4344 case GAUDI_QUEUE_ID_TPC_3_3:
4345 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4346 break;
4347
4348 case GAUDI_QUEUE_ID_TPC_4_0:
4349 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4350 break;
4351
4352 case GAUDI_QUEUE_ID_TPC_4_1:
4353 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4354 break;
4355
4356 case GAUDI_QUEUE_ID_TPC_4_2:
4357 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4358 break;
4359
4360 case GAUDI_QUEUE_ID_TPC_4_3:
4361 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4362 break;
4363
4364 case GAUDI_QUEUE_ID_TPC_5_0:
4365 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4366 break;
4367
4368 case GAUDI_QUEUE_ID_TPC_5_1:
4369 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4370 break;
4371
4372 case GAUDI_QUEUE_ID_TPC_5_2:
4373 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4374 break;
4375
4376 case GAUDI_QUEUE_ID_TPC_5_3:
4377 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4378 break;
4379
4380 case GAUDI_QUEUE_ID_TPC_6_0:
4381 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4382 break;
4383
4384 case GAUDI_QUEUE_ID_TPC_6_1:
4385 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4386 break;
4387
4388 case GAUDI_QUEUE_ID_TPC_6_2:
4389 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4390 break;
4391
4392 case GAUDI_QUEUE_ID_TPC_6_3:
4393 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4394 break;
4395
4396 case GAUDI_QUEUE_ID_TPC_7_0:
4397 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4398 break;
4399
4400 case GAUDI_QUEUE_ID_TPC_7_1:
4401 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4402 break;
4403
4404 case GAUDI_QUEUE_ID_TPC_7_2:
4405 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4406 break;
4407
4408 case GAUDI_QUEUE_ID_TPC_7_3:
4409 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4410 break;
4411
4412 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4413 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4414 invalid_queue = true;
4415
4416 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4417 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4418 break;
4419
4420 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4421 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4422 invalid_queue = true;
4423
4424 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4425 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4426 break;
4427
4428 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4429 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4430 invalid_queue = true;
4431
4432 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4433 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4434 break;
4435
4436 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4437 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4438 invalid_queue = true;
4439
4440 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4441 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4442 break;
4443
4444 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4445 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4446 invalid_queue = true;
4447
4448 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4449 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4450 break;
4451
4452 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4453 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4454 invalid_queue = true;
4455
4456 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4457 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4458 break;
4459
4460 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4461 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4462 invalid_queue = true;
4463
4464 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4465 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4466 break;
4467
4468 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4469 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4470 invalid_queue = true;
4471
4472 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4473 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4474 break;
4475
4476 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4477 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4478 invalid_queue = true;
4479
4480 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4481 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4482 break;
4483
4484 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4485 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4486 invalid_queue = true;
4487
4488 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4489 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4490 break;
4491
4492 default:
4493 invalid_queue = true;
4494 }
4495
4496 if (invalid_queue) {
4497 /* Should never get here */
4498 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4499 hw_queue_id);
4500 return;
4501 }
4502
4503 db_value = pi;
4504
4505 /* ring the doorbell */
4506 WREG32(db_reg_offset, db_value);
4507
4508 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4509 /* make sure device CPU will read latest data from host */
4510 mb();
4511
4512 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4513 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4514 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4515
4516 WREG32(irq_handler_offset,
4517 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4518 }
4519 }
4520
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4521 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4522 struct hl_bd *bd)
4523 {
4524 __le64 *pbd = (__le64 *) bd;
4525
4526 /* The QMANs are on the host memory so a simple copy suffice */
4527 pqe[0] = pbd[0];
4528 pqe[1] = pbd[1];
4529 }
4530
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4531 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4532 dma_addr_t *dma_handle, gfp_t flags)
4533 {
4534 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4535 dma_handle, flags);
4536
4537 /* Shift to the device's base physical address of host memory */
4538 if (kernel_addr)
4539 *dma_handle += HOST_PHYS_BASE;
4540
4541 return kernel_addr;
4542 }
4543
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4544 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4545 void *cpu_addr, dma_addr_t dma_handle)
4546 {
4547 /* Cancel the device's base physical address of host memory */
4548 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4549
4550 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4551 }
4552
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4553 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4554 {
4555 struct asic_fixed_properties *prop = &hdev->asic_prop;
4556 u64 cur_addr = prop->dram_user_base_address;
4557 u32 chunk_size, busy;
4558 int rc, dma_id;
4559
4560 while (cur_addr < prop->dram_end_address) {
4561 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4562 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4563
4564 chunk_size =
4565 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4566
4567 dev_dbg(hdev->dev,
4568 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4569 cur_addr, cur_addr + chunk_size);
4570
4571 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4572 lower_32_bits(val));
4573 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4574 upper_32_bits(val));
4575 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4576 lower_32_bits(cur_addr));
4577 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4578 upper_32_bits(cur_addr));
4579 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4580 chunk_size);
4581 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4582 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4583 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4584
4585 cur_addr += chunk_size;
4586
4587 if (cur_addr == prop->dram_end_address)
4588 break;
4589 }
4590
4591 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4592 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4593
4594 rc = hl_poll_timeout(
4595 hdev,
4596 mmDMA0_CORE_STS0 + dma_offset,
4597 busy,
4598 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4599 1000,
4600 HBM_SCRUBBING_TIMEOUT_US);
4601
4602 if (rc) {
4603 dev_err(hdev->dev,
4604 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4605 dma_id);
4606 return -EIO;
4607 }
4608 }
4609 }
4610
4611 return 0;
4612 }
4613
gaudi_scrub_device_mem(struct hl_device * hdev)4614 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4615 {
4616 struct asic_fixed_properties *prop = &hdev->asic_prop;
4617 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4618 u64 addr, size, val = hdev->memory_scrub_val;
4619 ktime_t timeout;
4620 int rc = 0;
4621
4622 if (!hdev->memory_scrub)
4623 return 0;
4624
4625 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4626 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4627 if (ktime_compare(ktime_get(), timeout) > 0) {
4628 dev_err(hdev->dev, "waiting for idle timeout\n");
4629 return -ETIMEDOUT;
4630 }
4631 usleep_range((1000 >> 2) + 1, 1000);
4632 }
4633
4634 /* Scrub SRAM */
4635 addr = prop->sram_user_base_address;
4636 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4637
4638 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4639 addr, addr + size, val);
4640 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4641 if (rc) {
4642 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4643 return rc;
4644 }
4645
4646 /* Scrub HBM using all DMA channels in parallel */
4647 rc = gaudi_scrub_device_dram(hdev, val);
4648 if (rc) {
4649 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4650 return rc;
4651 }
4652
4653 return 0;
4654 }
4655
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4656 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4657 u32 queue_id, dma_addr_t *dma_handle,
4658 u16 *queue_len)
4659 {
4660 struct gaudi_device *gaudi = hdev->asic_specific;
4661 struct gaudi_internal_qman_info *q;
4662
4663 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4664 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4665 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4666 return NULL;
4667 }
4668
4669 q = &gaudi->internal_qmans[queue_id];
4670 *dma_handle = q->pq_dma_addr;
4671 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4672
4673 return q->pq_kernel_addr;
4674 }
4675
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4676 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4677 u16 len, u32 timeout, u64 *result)
4678 {
4679 struct gaudi_device *gaudi = hdev->asic_specific;
4680
4681 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4682 if (result)
4683 *result = 0;
4684 return 0;
4685 }
4686
4687 if (!timeout)
4688 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4689
4690 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4691 timeout, result);
4692 }
4693
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4694 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4695 {
4696 struct packet_msg_prot *fence_pkt;
4697 dma_addr_t pkt_dma_addr;
4698 u32 fence_val, tmp, timeout_usec;
4699 dma_addr_t fence_dma_addr;
4700 u32 *fence_ptr;
4701 int rc;
4702
4703 if (hdev->pldm)
4704 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4705 else
4706 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4707
4708 fence_val = GAUDI_QMAN0_FENCE_VAL;
4709
4710 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4711 if (!fence_ptr) {
4712 dev_err(hdev->dev,
4713 "Failed to allocate memory for H/W queue %d testing\n",
4714 hw_queue_id);
4715 return -ENOMEM;
4716 }
4717
4718 *fence_ptr = 0;
4719
4720 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4721 &pkt_dma_addr);
4722 if (!fence_pkt) {
4723 dev_err(hdev->dev,
4724 "Failed to allocate packet for H/W queue %d testing\n",
4725 hw_queue_id);
4726 rc = -ENOMEM;
4727 goto free_fence_ptr;
4728 }
4729
4730 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4731 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4732 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4733
4734 fence_pkt->ctl = cpu_to_le32(tmp);
4735 fence_pkt->value = cpu_to_le32(fence_val);
4736 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4737
4738 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4739 sizeof(struct packet_msg_prot),
4740 pkt_dma_addr);
4741 if (rc) {
4742 dev_err(hdev->dev,
4743 "Failed to send fence packet to H/W queue %d\n",
4744 hw_queue_id);
4745 goto free_pkt;
4746 }
4747
4748 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4749 1000, timeout_usec, true);
4750
4751 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4752
4753 if (rc == -ETIMEDOUT) {
4754 dev_err(hdev->dev,
4755 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4756 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4757 rc = -EIO;
4758 }
4759
4760 free_pkt:
4761 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4762 free_fence_ptr:
4763 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4764 return rc;
4765 }
4766
gaudi_test_cpu_queue(struct hl_device * hdev)4767 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4768 {
4769 struct gaudi_device *gaudi = hdev->asic_specific;
4770
4771 /*
4772 * check capability here as send_cpu_message() won't update the result
4773 * value if no capability
4774 */
4775 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4776 return 0;
4777
4778 return hl_fw_test_cpu_queue(hdev);
4779 }
4780
gaudi_test_queues(struct hl_device * hdev)4781 static int gaudi_test_queues(struct hl_device *hdev)
4782 {
4783 int i, rc, ret_val = 0;
4784
4785 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4786 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4787 rc = gaudi_test_queue(hdev, i);
4788 if (rc)
4789 ret_val = -EINVAL;
4790 }
4791 }
4792
4793 rc = gaudi_test_cpu_queue(hdev);
4794 if (rc)
4795 ret_val = -EINVAL;
4796
4797 return ret_val;
4798 }
4799
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4800 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4801 gfp_t mem_flags, dma_addr_t *dma_handle)
4802 {
4803 void *kernel_addr;
4804
4805 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4806 return NULL;
4807
4808 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4809
4810 /* Shift to the device's base physical address of host memory */
4811 if (kernel_addr)
4812 *dma_handle += HOST_PHYS_BASE;
4813
4814 return kernel_addr;
4815 }
4816
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4817 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4818 dma_addr_t dma_addr)
4819 {
4820 /* Cancel the device's base physical address of host memory */
4821 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4822
4823 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4824 }
4825
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4826 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4827 size_t size, dma_addr_t *dma_handle)
4828 {
4829 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4830 }
4831
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4832 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4833 size_t size, void *vaddr)
4834 {
4835 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4836 }
4837
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4838 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4839 {
4840 struct scatterlist *sg, *sg_next_iter;
4841 u32 count, dma_desc_cnt;
4842 u64 len, len_next;
4843 dma_addr_t addr, addr_next;
4844
4845 dma_desc_cnt = 0;
4846
4847 for_each_sgtable_dma_sg(sgt, sg, count) {
4848 len = sg_dma_len(sg);
4849 addr = sg_dma_address(sg);
4850
4851 if (len == 0)
4852 break;
4853
4854 while ((count + 1) < sgt->nents) {
4855 sg_next_iter = sg_next(sg);
4856 len_next = sg_dma_len(sg_next_iter);
4857 addr_next = sg_dma_address(sg_next_iter);
4858
4859 if (len_next == 0)
4860 break;
4861
4862 if ((addr + len == addr_next) &&
4863 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4864 len += len_next;
4865 count++;
4866 sg = sg_next_iter;
4867 } else {
4868 break;
4869 }
4870 }
4871
4872 dma_desc_cnt++;
4873 }
4874
4875 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4876 }
4877
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4878 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4879 struct hl_cs_parser *parser,
4880 struct packet_lin_dma *user_dma_pkt,
4881 u64 addr, enum dma_data_direction dir)
4882 {
4883 struct hl_userptr *userptr;
4884 int rc;
4885
4886 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4887 parser->job_userptr_list, &userptr))
4888 goto already_pinned;
4889
4890 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL);
4891 if (!userptr)
4892 return -ENOMEM;
4893
4894 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4895 userptr);
4896 if (rc)
4897 goto free_userptr;
4898
4899 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4900
4901 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4902 if (rc) {
4903 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4904 goto unpin_memory;
4905 }
4906
4907 userptr->dma_mapped = true;
4908 userptr->dir = dir;
4909
4910 already_pinned:
4911 parser->patched_cb_size +=
4912 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4913
4914 return 0;
4915
4916 unpin_memory:
4917 list_del(&userptr->job_node);
4918 hl_unpin_host_memory(hdev, userptr);
4919 free_userptr:
4920 kfree(userptr);
4921 return rc;
4922 }
4923
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4924 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4925 struct hl_cs_parser *parser,
4926 struct packet_lin_dma *user_dma_pkt,
4927 bool src_in_host)
4928 {
4929 enum dma_data_direction dir;
4930 bool skip_host_mem_pin = false, user_memset;
4931 u64 addr;
4932 int rc = 0;
4933
4934 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4935 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4936 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4937
4938 if (src_in_host) {
4939 if (user_memset)
4940 skip_host_mem_pin = true;
4941
4942 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4943 dir = DMA_TO_DEVICE;
4944 addr = le64_to_cpu(user_dma_pkt->src_addr);
4945 } else {
4946 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4947 dir = DMA_FROM_DEVICE;
4948 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4949 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4950 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4951 }
4952
4953 if (skip_host_mem_pin)
4954 parser->patched_cb_size += sizeof(*user_dma_pkt);
4955 else
4956 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4957 addr, dir);
4958
4959 return rc;
4960 }
4961
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)4962 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4963 struct hl_cs_parser *parser,
4964 struct packet_lin_dma *user_dma_pkt)
4965 {
4966 bool src_in_host = false;
4967 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4968 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4969 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4970
4971 dev_dbg(hdev->dev, "DMA packet details:\n");
4972 dev_dbg(hdev->dev, "source == 0x%llx\n",
4973 le64_to_cpu(user_dma_pkt->src_addr));
4974 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4975 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4976
4977 /*
4978 * Special handling for DMA with size 0. Bypass all validations
4979 * because no transactions will be done except for WR_COMP, which
4980 * is not a security issue
4981 */
4982 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4983 parser->patched_cb_size += sizeof(*user_dma_pkt);
4984 return 0;
4985 }
4986
4987 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4988 src_in_host = true;
4989
4990 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4991 src_in_host);
4992 }
4993
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)4994 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
4995 struct hl_cs_parser *parser,
4996 struct packet_load_and_exe *user_pkt)
4997 {
4998 u32 cfg;
4999
5000 cfg = le32_to_cpu(user_pkt->cfg);
5001
5002 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5003 dev_err(hdev->dev,
5004 "User not allowed to use Load and Execute\n");
5005 return -EPERM;
5006 }
5007
5008 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5009
5010 return 0;
5011 }
5012
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5013 static int gaudi_validate_cb(struct hl_device *hdev,
5014 struct hl_cs_parser *parser, bool is_mmu)
5015 {
5016 u32 cb_parsed_length = 0;
5017 int rc = 0;
5018
5019 parser->patched_cb_size = 0;
5020
5021 /* cb_user_size is more than 0 so loop will always be executed */
5022 while (cb_parsed_length < parser->user_cb_size) {
5023 enum packet_id pkt_id;
5024 u16 pkt_size;
5025 struct gaudi_packet *user_pkt;
5026
5027 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5028
5029 pkt_id = (enum packet_id) (
5030 (le64_to_cpu(user_pkt->header) &
5031 PACKET_HEADER_PACKET_ID_MASK) >>
5032 PACKET_HEADER_PACKET_ID_SHIFT);
5033
5034 if (!validate_packet_id(pkt_id)) {
5035 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5036 rc = -EINVAL;
5037 break;
5038 }
5039
5040 pkt_size = gaudi_packet_sizes[pkt_id];
5041 cb_parsed_length += pkt_size;
5042 if (cb_parsed_length > parser->user_cb_size) {
5043 dev_err(hdev->dev,
5044 "packet 0x%x is out of CB boundary\n", pkt_id);
5045 rc = -EINVAL;
5046 break;
5047 }
5048
5049 switch (pkt_id) {
5050 case PACKET_MSG_PROT:
5051 dev_err(hdev->dev,
5052 "User not allowed to use MSG_PROT\n");
5053 rc = -EPERM;
5054 break;
5055
5056 case PACKET_CP_DMA:
5057 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5058 rc = -EPERM;
5059 break;
5060
5061 case PACKET_STOP:
5062 dev_err(hdev->dev, "User not allowed to use STOP\n");
5063 rc = -EPERM;
5064 break;
5065
5066 case PACKET_WREG_BULK:
5067 dev_err(hdev->dev,
5068 "User not allowed to use WREG_BULK\n");
5069 rc = -EPERM;
5070 break;
5071
5072 case PACKET_LOAD_AND_EXE:
5073 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5074 (struct packet_load_and_exe *) user_pkt);
5075 break;
5076
5077 case PACKET_LIN_DMA:
5078 parser->contains_dma_pkt = true;
5079 if (is_mmu)
5080 parser->patched_cb_size += pkt_size;
5081 else
5082 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5083 (struct packet_lin_dma *) user_pkt);
5084 break;
5085
5086 case PACKET_WREG_32:
5087 case PACKET_MSG_LONG:
5088 case PACKET_MSG_SHORT:
5089 case PACKET_REPEAT:
5090 case PACKET_FENCE:
5091 case PACKET_NOP:
5092 case PACKET_ARB_POINT:
5093 parser->patched_cb_size += pkt_size;
5094 break;
5095
5096 default:
5097 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5098 pkt_id);
5099 rc = -EINVAL;
5100 break;
5101 }
5102
5103 if (rc)
5104 break;
5105 }
5106
5107 /*
5108 * The new CB should have space at the end for two MSG_PROT packets:
5109 * 1. Optional NOP padding for cacheline alignment
5110 * 2. A packet that will act as a completion packet
5111 * 3. A packet that will generate MSI interrupt
5112 */
5113 if (parser->completion)
5114 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5115 parser->patched_cb_size);
5116
5117 return rc;
5118 }
5119
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5120 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5121 struct hl_cs_parser *parser,
5122 struct packet_lin_dma *user_dma_pkt,
5123 struct packet_lin_dma *new_dma_pkt,
5124 u32 *new_dma_pkt_size)
5125 {
5126 struct hl_userptr *userptr;
5127 struct scatterlist *sg, *sg_next_iter;
5128 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5129 u64 len, len_next;
5130 dma_addr_t dma_addr, dma_addr_next;
5131 u64 device_memory_addr, addr;
5132 enum dma_data_direction dir;
5133 struct sg_table *sgt;
5134 bool src_in_host = false;
5135 bool skip_host_mem_pin = false;
5136 bool user_memset;
5137
5138 ctl = le32_to_cpu(user_dma_pkt->ctl);
5139
5140 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5141 src_in_host = true;
5142
5143 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5144 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5145
5146 if (src_in_host) {
5147 addr = le64_to_cpu(user_dma_pkt->src_addr);
5148 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5149 dir = DMA_TO_DEVICE;
5150 if (user_memset)
5151 skip_host_mem_pin = true;
5152 } else {
5153 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5154 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5155 dir = DMA_FROM_DEVICE;
5156 }
5157
5158 if ((!skip_host_mem_pin) &&
5159 (!hl_userptr_is_pinned(hdev, addr,
5160 le32_to_cpu(user_dma_pkt->tsize),
5161 parser->job_userptr_list, &userptr))) {
5162 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5163 addr, user_dma_pkt->tsize);
5164 return -EFAULT;
5165 }
5166
5167 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5168 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5169 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5170 return 0;
5171 }
5172
5173 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5174
5175 sgt = userptr->sgt;
5176 dma_desc_cnt = 0;
5177
5178 for_each_sgtable_dma_sg(sgt, sg, count) {
5179 len = sg_dma_len(sg);
5180 dma_addr = sg_dma_address(sg);
5181
5182 if (len == 0)
5183 break;
5184
5185 while ((count + 1) < sgt->nents) {
5186 sg_next_iter = sg_next(sg);
5187 len_next = sg_dma_len(sg_next_iter);
5188 dma_addr_next = sg_dma_address(sg_next_iter);
5189
5190 if (len_next == 0)
5191 break;
5192
5193 if ((dma_addr + len == dma_addr_next) &&
5194 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5195 len += len_next;
5196 count++;
5197 sg = sg_next_iter;
5198 } else {
5199 break;
5200 }
5201 }
5202
5203 ctl = le32_to_cpu(user_dma_pkt->ctl);
5204 if (likely(dma_desc_cnt))
5205 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5206 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5207 new_dma_pkt->ctl = cpu_to_le32(ctl);
5208 new_dma_pkt->tsize = cpu_to_le32(len);
5209
5210 if (dir == DMA_TO_DEVICE) {
5211 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5212 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5213 } else {
5214 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5215 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5216 }
5217
5218 if (!user_memset)
5219 device_memory_addr += len;
5220 dma_desc_cnt++;
5221 new_dma_pkt++;
5222 }
5223
5224 if (!dma_desc_cnt) {
5225 dev_err(hdev->dev,
5226 "Error of 0 SG entries when patching DMA packet\n");
5227 return -EFAULT;
5228 }
5229
5230 /* Fix the last dma packet - wrcomp must be as user set it */
5231 new_dma_pkt--;
5232 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5233
5234 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5235
5236 return 0;
5237 }
5238
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5239 static int gaudi_patch_cb(struct hl_device *hdev,
5240 struct hl_cs_parser *parser)
5241 {
5242 u32 cb_parsed_length = 0;
5243 u32 cb_patched_cur_length = 0;
5244 int rc = 0;
5245
5246 /* cb_user_size is more than 0 so loop will always be executed */
5247 while (cb_parsed_length < parser->user_cb_size) {
5248 enum packet_id pkt_id;
5249 u16 pkt_size;
5250 u32 new_pkt_size = 0;
5251 struct gaudi_packet *user_pkt, *kernel_pkt;
5252
5253 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5254 kernel_pkt = parser->patched_cb->kernel_address +
5255 cb_patched_cur_length;
5256
5257 pkt_id = (enum packet_id) (
5258 (le64_to_cpu(user_pkt->header) &
5259 PACKET_HEADER_PACKET_ID_MASK) >>
5260 PACKET_HEADER_PACKET_ID_SHIFT);
5261
5262 if (!validate_packet_id(pkt_id)) {
5263 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5264 rc = -EINVAL;
5265 break;
5266 }
5267
5268 pkt_size = gaudi_packet_sizes[pkt_id];
5269 cb_parsed_length += pkt_size;
5270 if (cb_parsed_length > parser->user_cb_size) {
5271 dev_err(hdev->dev,
5272 "packet 0x%x is out of CB boundary\n", pkt_id);
5273 rc = -EINVAL;
5274 break;
5275 }
5276
5277 switch (pkt_id) {
5278 case PACKET_LIN_DMA:
5279 rc = gaudi_patch_dma_packet(hdev, parser,
5280 (struct packet_lin_dma *) user_pkt,
5281 (struct packet_lin_dma *) kernel_pkt,
5282 &new_pkt_size);
5283 cb_patched_cur_length += new_pkt_size;
5284 break;
5285
5286 case PACKET_MSG_PROT:
5287 dev_err(hdev->dev,
5288 "User not allowed to use MSG_PROT\n");
5289 rc = -EPERM;
5290 break;
5291
5292 case PACKET_CP_DMA:
5293 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5294 rc = -EPERM;
5295 break;
5296
5297 case PACKET_STOP:
5298 dev_err(hdev->dev, "User not allowed to use STOP\n");
5299 rc = -EPERM;
5300 break;
5301
5302 case PACKET_WREG_32:
5303 case PACKET_WREG_BULK:
5304 case PACKET_MSG_LONG:
5305 case PACKET_MSG_SHORT:
5306 case PACKET_REPEAT:
5307 case PACKET_FENCE:
5308 case PACKET_NOP:
5309 case PACKET_ARB_POINT:
5310 case PACKET_LOAD_AND_EXE:
5311 memcpy(kernel_pkt, user_pkt, pkt_size);
5312 cb_patched_cur_length += pkt_size;
5313 break;
5314
5315 default:
5316 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5317 pkt_id);
5318 rc = -EINVAL;
5319 break;
5320 }
5321
5322 if (rc)
5323 break;
5324 }
5325
5326 return rc;
5327 }
5328
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5329 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5330 struct hl_cs_parser *parser)
5331 {
5332 u64 handle;
5333 u32 patched_cb_size;
5334 struct hl_cb *user_cb;
5335 int rc;
5336
5337 /*
5338 * The new CB should have space at the end for two MSG_PROT packets:
5339 * 1. Optional NOP padding for cacheline alignment
5340 * 2. A packet that will act as a completion packet
5341 * 3. A packet that will generate MSI interrupt
5342 */
5343 if (parser->completion)
5344 parser->patched_cb_size = parser->user_cb_size +
5345 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5346 else
5347 parser->patched_cb_size = parser->user_cb_size;
5348
5349 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5350 parser->patched_cb_size, false, false,
5351 &handle);
5352
5353 if (rc) {
5354 dev_err(hdev->dev,
5355 "Failed to allocate patched CB for DMA CS %d\n",
5356 rc);
5357 return rc;
5358 }
5359
5360 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5361 /* hl_cb_get should never fail */
5362 if (!parser->patched_cb) {
5363 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5364 rc = -EFAULT;
5365 goto out;
5366 }
5367
5368 /*
5369 * We are protected from overflow because the check
5370 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5371 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5372 *
5373 * There is no option to reach here without going through that check because:
5374 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5375 * an external queue.
5376 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5377 */
5378 memcpy(parser->patched_cb->kernel_address,
5379 parser->user_cb->kernel_address,
5380 parser->user_cb_size);
5381
5382 patched_cb_size = parser->patched_cb_size;
5383
5384 /* Validate patched CB instead of user CB */
5385 user_cb = parser->user_cb;
5386 parser->user_cb = parser->patched_cb;
5387 rc = gaudi_validate_cb(hdev, parser, true);
5388 parser->user_cb = user_cb;
5389
5390 if (rc) {
5391 hl_cb_put(parser->patched_cb);
5392 goto out;
5393 }
5394
5395 if (patched_cb_size != parser->patched_cb_size) {
5396 dev_err(hdev->dev, "user CB size mismatch\n");
5397 hl_cb_put(parser->patched_cb);
5398 rc = -EINVAL;
5399 goto out;
5400 }
5401
5402 out:
5403 /*
5404 * Always call cb destroy here because we still have 1 reference
5405 * to it by calling cb_get earlier. After the job will be completed,
5406 * cb_put will release it, but here we want to remove it from the
5407 * idr
5408 */
5409 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5410
5411 return rc;
5412 }
5413
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5414 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5415 struct hl_cs_parser *parser)
5416 {
5417 u64 handle;
5418 int rc;
5419
5420 rc = gaudi_validate_cb(hdev, parser, false);
5421
5422 if (rc)
5423 goto free_userptr;
5424
5425 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5426 parser->patched_cb_size, false, false,
5427 &handle);
5428 if (rc) {
5429 dev_err(hdev->dev,
5430 "Failed to allocate patched CB for DMA CS %d\n", rc);
5431 goto free_userptr;
5432 }
5433
5434 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5435 /* hl_cb_get should never fail here */
5436 if (!parser->patched_cb) {
5437 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5438 rc = -EFAULT;
5439 goto out;
5440 }
5441
5442 rc = gaudi_patch_cb(hdev, parser);
5443
5444 if (rc)
5445 hl_cb_put(parser->patched_cb);
5446
5447 out:
5448 /*
5449 * Always call cb destroy here because we still have 1 reference
5450 * to it by calling cb_get earlier. After the job will be completed,
5451 * cb_put will release it, but here we want to remove it from the
5452 * idr
5453 */
5454 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5455
5456 free_userptr:
5457 if (rc)
5458 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5459 return rc;
5460 }
5461
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5462 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5463 struct hl_cs_parser *parser)
5464 {
5465 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5466 struct gaudi_device *gaudi = hdev->asic_specific;
5467 u32 nic_queue_offset, nic_mask_q_id;
5468
5469 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5470 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5471 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5472 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5473
5474 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5475 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5476 return -EINVAL;
5477 }
5478 }
5479
5480 /* For internal queue jobs just check if CB address is valid */
5481 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5482 parser->user_cb_size,
5483 asic_prop->sram_user_base_address,
5484 asic_prop->sram_end_address))
5485 return 0;
5486
5487 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5488 parser->user_cb_size,
5489 asic_prop->dram_user_base_address,
5490 asic_prop->dram_end_address))
5491 return 0;
5492
5493 /* PMMU and HPMMU addresses are equal, check only one of them */
5494 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5495 parser->user_cb_size,
5496 asic_prop->pmmu.start_addr,
5497 asic_prop->pmmu.end_addr))
5498 return 0;
5499
5500 dev_err(hdev->dev,
5501 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5502 parser->user_cb, parser->user_cb_size);
5503
5504 return -EFAULT;
5505 }
5506
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5507 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5508 {
5509 struct gaudi_device *gaudi = hdev->asic_specific;
5510
5511 if (parser->queue_type == QUEUE_TYPE_INT)
5512 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5513
5514 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5515 return gaudi_parse_cb_mmu(hdev, parser);
5516 else
5517 return gaudi_parse_cb_no_mmu(hdev, parser);
5518 }
5519
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5520 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5521 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5522 u32 msi_vec, bool eb)
5523 {
5524 struct packet_msg_prot *cq_pkt;
5525 struct packet_nop *cq_padding;
5526 u64 msi_addr;
5527 u32 tmp;
5528
5529 cq_padding = kernel_address + original_len;
5530 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5531
5532 while ((void *)cq_padding < (void *)cq_pkt) {
5533 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5534 cq_padding++;
5535 }
5536
5537 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5538 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5539
5540 if (eb)
5541 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5542
5543 cq_pkt->ctl = cpu_to_le32(tmp);
5544 cq_pkt->value = cpu_to_le32(cq_val);
5545 cq_pkt->addr = cpu_to_le64(cq_addr);
5546
5547 cq_pkt++;
5548
5549 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5550 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5551 cq_pkt->ctl = cpu_to_le32(tmp);
5552 cq_pkt->value = cpu_to_le32(1);
5553 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5554 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5555 }
5556
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5557 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5558 {
5559 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5560 }
5561
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5562 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5563 u32 size, u64 val)
5564 {
5565 struct packet_lin_dma *lin_dma_pkt;
5566 struct hl_cs_job *job;
5567 u32 cb_size, ctl, err_cause;
5568 struct hl_cb *cb;
5569 int rc;
5570
5571 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5572 if (!cb)
5573 return -EFAULT;
5574
5575 lin_dma_pkt = cb->kernel_address;
5576 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5577 cb_size = sizeof(*lin_dma_pkt);
5578
5579 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5580 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5581 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5582 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5583 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5584
5585 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5586 lin_dma_pkt->src_addr = cpu_to_le64(val);
5587 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5588 lin_dma_pkt->tsize = cpu_to_le32(size);
5589
5590 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5591 if (!job) {
5592 dev_err(hdev->dev, "Failed to allocate a new job\n");
5593 rc = -ENOMEM;
5594 goto release_cb;
5595 }
5596
5597 /* Verify DMA is OK */
5598 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5599 if (err_cause && !hdev->init_done) {
5600 dev_dbg(hdev->dev,
5601 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5602 err_cause);
5603 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5604 }
5605
5606 job->id = 0;
5607 job->user_cb = cb;
5608 atomic_inc(&job->user_cb->cs_cnt);
5609 job->user_cb_size = cb_size;
5610 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5611 job->patched_cb = job->user_cb;
5612 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5613
5614 hl_debugfs_add_job(hdev, job);
5615
5616 rc = gaudi_send_job_on_qman0(hdev, job);
5617 hl_debugfs_remove_job(hdev, job);
5618 kfree(job);
5619 atomic_dec(&cb->cs_cnt);
5620
5621 /* Verify DMA is OK */
5622 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5623 if (err_cause) {
5624 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5625 rc = -EIO;
5626 if (!hdev->init_done) {
5627 dev_dbg(hdev->dev,
5628 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5629 err_cause);
5630 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5631 }
5632 }
5633
5634 release_cb:
5635 hl_cb_put(cb);
5636 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5637
5638 return rc;
5639 }
5640
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5641 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5642 u32 num_regs, u32 val)
5643 {
5644 struct packet_msg_long *pkt;
5645 struct hl_cs_job *job;
5646 u32 cb_size, ctl;
5647 struct hl_cb *cb;
5648 int i, rc;
5649
5650 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5651
5652 if (cb_size > SZ_2M) {
5653 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5654 return -ENOMEM;
5655 }
5656
5657 cb = hl_cb_kernel_create(hdev, cb_size, false);
5658 if (!cb)
5659 return -EFAULT;
5660
5661 pkt = cb->kernel_address;
5662
5663 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5664 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5665 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5666 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5667 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5668
5669 for (i = 0; i < num_regs ; i++, pkt++) {
5670 pkt->ctl = cpu_to_le32(ctl);
5671 pkt->value = cpu_to_le32(val);
5672 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5673 }
5674
5675 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5676 if (!job) {
5677 dev_err(hdev->dev, "Failed to allocate a new job\n");
5678 rc = -ENOMEM;
5679 goto release_cb;
5680 }
5681
5682 job->id = 0;
5683 job->user_cb = cb;
5684 atomic_inc(&job->user_cb->cs_cnt);
5685 job->user_cb_size = cb_size;
5686 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5687 job->patched_cb = job->user_cb;
5688 job->job_cb_size = cb_size;
5689
5690 hl_debugfs_add_job(hdev, job);
5691
5692 rc = gaudi_send_job_on_qman0(hdev, job);
5693 hl_debugfs_remove_job(hdev, job);
5694 kfree(job);
5695 atomic_dec(&cb->cs_cnt);
5696
5697 release_cb:
5698 hl_cb_put(cb);
5699 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5700
5701 return rc;
5702 }
5703
gaudi_restore_sm_registers(struct hl_device * hdev)5704 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5705 {
5706 u64 base_addr;
5707 u32 num_regs;
5708 int rc;
5709
5710 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5711 num_regs = NUM_OF_SOB_IN_BLOCK;
5712 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5713 if (rc) {
5714 dev_err(hdev->dev, "failed resetting SM registers");
5715 return -ENOMEM;
5716 }
5717
5718 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5719 num_regs = NUM_OF_SOB_IN_BLOCK;
5720 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5721 if (rc) {
5722 dev_err(hdev->dev, "failed resetting SM registers");
5723 return -ENOMEM;
5724 }
5725
5726 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5727 num_regs = NUM_OF_SOB_IN_BLOCK;
5728 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5729 if (rc) {
5730 dev_err(hdev->dev, "failed resetting SM registers");
5731 return -ENOMEM;
5732 }
5733
5734 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5735 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5736 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5737 if (rc) {
5738 dev_err(hdev->dev, "failed resetting SM registers");
5739 return -ENOMEM;
5740 }
5741
5742 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5743 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5744 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5745 if (rc) {
5746 dev_err(hdev->dev, "failed resetting SM registers");
5747 return -ENOMEM;
5748 }
5749
5750 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5751 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5752 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5753 if (rc) {
5754 dev_err(hdev->dev, "failed resetting SM registers");
5755 return -ENOMEM;
5756 }
5757
5758 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5759 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5760 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5761 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5762 if (rc) {
5763 dev_err(hdev->dev, "failed resetting SM registers");
5764 return -ENOMEM;
5765 }
5766
5767 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5768 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5769 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5770 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771 if (rc) {
5772 dev_err(hdev->dev, "failed resetting SM registers");
5773 return -ENOMEM;
5774 }
5775
5776 return 0;
5777 }
5778
gaudi_restore_dma_registers(struct hl_device * hdev)5779 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5780 {
5781 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5782 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5783 int i;
5784
5785 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5786 u64 sob_addr = CFG_BASE +
5787 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5788 (i * sob_delta);
5789 u32 dma_offset = i * DMA_CORE_OFFSET;
5790
5791 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5792 lower_32_bits(sob_addr));
5793 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5794 upper_32_bits(sob_addr));
5795 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5796
5797 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5798 * modified by the user for SRAM reduction
5799 */
5800 if (i > 1)
5801 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5802 0x00000001);
5803 }
5804 }
5805
gaudi_restore_qm_registers(struct hl_device * hdev)5806 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5807 {
5808 u32 qman_offset;
5809 int i;
5810
5811 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5812 qman_offset = i * DMA_QMAN_OFFSET;
5813 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5814 }
5815
5816 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5817 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5818 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5819 }
5820
5821 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5822 qman_offset = i * TPC_QMAN_OFFSET;
5823 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5824 }
5825
5826 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5827 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5828 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5829 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5830 }
5831 }
5832
gaudi_restore_user_registers(struct hl_device * hdev)5833 static int gaudi_restore_user_registers(struct hl_device *hdev)
5834 {
5835 int rc;
5836
5837 rc = gaudi_restore_sm_registers(hdev);
5838 if (rc)
5839 return rc;
5840
5841 gaudi_restore_dma_registers(hdev);
5842 gaudi_restore_qm_registers(hdev);
5843
5844 return 0;
5845 }
5846
gaudi_context_switch(struct hl_device * hdev,u32 asid)5847 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5848 {
5849 return 0;
5850 }
5851
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5852 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5853 {
5854 u32 size = hdev->asic_prop.mmu_pgt_size +
5855 hdev->asic_prop.mmu_cache_mng_size;
5856 struct gaudi_device *gaudi = hdev->asic_specific;
5857 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5858
5859 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5860 return 0;
5861
5862 return gaudi_memset_device_memory(hdev, addr, size, 0);
5863 }
5864
gaudi_restore_phase_topology(struct hl_device * hdev)5865 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5866 {
5867
5868 }
5869
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5870 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5871 u32 size_to_dma, dma_addr_t dma_addr)
5872 {
5873 u32 err_cause, val;
5874 u64 dma_offset;
5875 int rc;
5876
5877 dma_offset = dma_id * DMA_CORE_OFFSET;
5878
5879 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5880 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5881 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5882 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5883 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5884 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5885 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5886
5887 rc = hl_poll_timeout(
5888 hdev,
5889 mmDMA0_CORE_STS0 + dma_offset,
5890 val,
5891 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5892 0,
5893 1000000);
5894
5895 if (rc) {
5896 dev_err(hdev->dev,
5897 "DMA %d timed-out during reading of 0x%llx\n",
5898 dma_id, addr);
5899 return -EIO;
5900 }
5901
5902 /* Verify DMA is OK */
5903 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5904 if (err_cause) {
5905 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5906 dev_dbg(hdev->dev,
5907 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5908 err_cause);
5909 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5910
5911 return -EIO;
5912 }
5913
5914 return 0;
5915 }
5916
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5917 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5918 void *blob_addr)
5919 {
5920 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5921 u32 qm_glbl_sts0, qm_cgm_sts;
5922 u64 dma_offset, qm_offset;
5923 dma_addr_t dma_addr;
5924 void *kernel_addr;
5925 bool is_eng_idle;
5926 int rc = 0, dma_id;
5927
5928 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5929
5930 if (!kernel_addr)
5931 return -ENOMEM;
5932
5933 hdev->asic_funcs->hw_queues_lock(hdev);
5934
5935 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5936 dma_offset = dma_id * DMA_CORE_OFFSET;
5937 qm_offset = dma_id * DMA_QMAN_OFFSET;
5938 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5939 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5940 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5941 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5942 IS_DMA_IDLE(dma_core_sts0);
5943
5944 if (!is_eng_idle) {
5945 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5946 dma_offset = dma_id * DMA_CORE_OFFSET;
5947 qm_offset = dma_id * DMA_QMAN_OFFSET;
5948 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5949 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5950 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5951 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5952 IS_DMA_IDLE(dma_core_sts0);
5953
5954 if (!is_eng_idle) {
5955 dev_err_ratelimited(hdev->dev,
5956 "Can't read via DMA because it is BUSY\n");
5957 rc = -EAGAIN;
5958 goto out;
5959 }
5960 }
5961
5962 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5963 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5964 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5965
5966 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5967 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5968 * ASID
5969 */
5970 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5971
5972 /* Verify DMA is OK */
5973 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5974 if (err_cause) {
5975 dev_dbg(hdev->dev,
5976 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5977 err_cause);
5978 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5979 }
5980
5981 pos = 0;
5982 size_left = size;
5983 size_to_dma = SZ_2M;
5984
5985 while (size_left > 0) {
5986
5987 if (size_left < SZ_2M)
5988 size_to_dma = size_left;
5989
5990 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
5991 dma_addr);
5992 if (rc)
5993 break;
5994
5995 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
5996
5997 if (size_left <= SZ_2M)
5998 break;
5999
6000 pos += SZ_2M;
6001 addr += SZ_2M;
6002 size_left -= SZ_2M;
6003 }
6004
6005 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6006 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6007 * ASID
6008 */
6009 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6010 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6011
6012 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6013
6014 out:
6015 hdev->asic_funcs->hw_queues_unlock(hdev);
6016
6017 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6018
6019 return rc;
6020 }
6021
gaudi_read_pte(struct hl_device * hdev,u64 addr)6022 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6023 {
6024 struct gaudi_device *gaudi = hdev->asic_specific;
6025
6026 if (hdev->reset_info.hard_reset_pending)
6027 return U64_MAX;
6028
6029 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6030 (addr - gaudi->hbm_bar_cur_addr));
6031 }
6032
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6033 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6034 {
6035 struct gaudi_device *gaudi = hdev->asic_specific;
6036
6037 if (hdev->reset_info.hard_reset_pending)
6038 return;
6039
6040 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6041 (addr - gaudi->hbm_bar_cur_addr));
6042 }
6043
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6044 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6045 {
6046 /* mask to zero the MMBP and ASID bits */
6047 WREG32_AND(reg, ~0x7FF);
6048 WREG32_OR(reg, asid);
6049 }
6050
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6051 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6052 {
6053 struct gaudi_device *gaudi = hdev->asic_specific;
6054
6055 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6056 return;
6057
6058 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6059 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6060 return;
6061 }
6062
6063 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6064 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6065 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6066 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6067 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6068
6069 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6070 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6071 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6072 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6073 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6074
6075 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6076 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6077 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6078 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6079 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6080
6081 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086
6087 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092
6093 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098
6099 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104
6105 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110
6111 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6112 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6113 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6114 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6116 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6117 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6118 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6119
6120 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6122 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6123 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6124 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6125 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6126 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6127
6128 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6129 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6134 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6135
6136 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6137 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6138 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6143
6144 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6145 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6146 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6151
6152 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6153 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6154 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6159
6160 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6161 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6162 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6167
6168 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6169 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6170 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6175
6176 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6177 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6178 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6183
6184 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6185 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6186 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6193 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6194
6195 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6201 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6202 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6207
6208 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6209 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6210 asid);
6211 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6212 asid);
6213 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6214 asid);
6215 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6216 asid);
6217 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6218 asid);
6219 }
6220
6221 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6222 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6223 asid);
6224 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6225 asid);
6226 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6227 asid);
6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6229 asid);
6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6231 asid);
6232 }
6233
6234 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6235 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6236 asid);
6237 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6238 asid);
6239 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6240 asid);
6241 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6242 asid);
6243 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6244 asid);
6245 }
6246
6247 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6248 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6249 asid);
6250 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6251 asid);
6252 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6253 asid);
6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6255 asid);
6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6257 asid);
6258 }
6259
6260 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6261 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6262 asid);
6263 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6264 asid);
6265 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6266 asid);
6267 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6268 asid);
6269 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6270 asid);
6271 }
6272
6273 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6274 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6275 asid);
6276 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6277 asid);
6278 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6279 asid);
6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6281 asid);
6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6283 asid);
6284 }
6285
6286 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6287 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6288 asid);
6289 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6290 asid);
6291 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6292 asid);
6293 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6294 asid);
6295 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6296 asid);
6297 }
6298
6299 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6300 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6301 asid);
6302 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6303 asid);
6304 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6305 asid);
6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6307 asid);
6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6309 asid);
6310 }
6311
6312 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6313 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6314 asid);
6315 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6316 asid);
6317 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6318 asid);
6319 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6320 asid);
6321 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6322 asid);
6323 }
6324
6325 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6326 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6327 asid);
6328 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6329 asid);
6330 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6331 asid);
6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6333 asid);
6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6335 asid);
6336 }
6337
6338 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6339 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6340 }
6341
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6342 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6343 struct hl_cs_job *job)
6344 {
6345 struct packet_msg_prot *fence_pkt;
6346 u32 *fence_ptr;
6347 dma_addr_t fence_dma_addr;
6348 struct hl_cb *cb;
6349 u32 tmp, timeout, dma_offset;
6350 int rc;
6351
6352 if (hdev->pldm)
6353 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6354 else
6355 timeout = HL_DEVICE_TIMEOUT_USEC;
6356
6357 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6358 if (!fence_ptr) {
6359 dev_err(hdev->dev,
6360 "Failed to allocate fence memory for QMAN0\n");
6361 return -ENOMEM;
6362 }
6363
6364 cb = job->patched_cb;
6365
6366 fence_pkt = cb->kernel_address +
6367 job->job_cb_size - sizeof(struct packet_msg_prot);
6368
6369 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6370 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6371 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6372
6373 fence_pkt->ctl = cpu_to_le32(tmp);
6374 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6375 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6376
6377 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6378
6379 WREG32(mmDMA0_CORE_PROT + dma_offset,
6380 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6381
6382 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6383 job->job_cb_size, cb->bus_address);
6384 if (rc) {
6385 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6386 goto free_fence_ptr;
6387 }
6388
6389 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6390 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6391 timeout, true);
6392
6393 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6394
6395 if (rc == -ETIMEDOUT) {
6396 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6397 goto free_fence_ptr;
6398 }
6399
6400 free_fence_ptr:
6401 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6402
6403 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6404 return rc;
6405 }
6406
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6407 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6408 {
6409 if (event_type >= GAUDI_EVENT_SIZE)
6410 goto event_not_supported;
6411
6412 if (!gaudi_irq_map_table[event_type].valid)
6413 goto event_not_supported;
6414
6415 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6416
6417 return;
6418
6419 event_not_supported:
6420 snprintf(desc, size, "N/A");
6421 }
6422
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6423 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6424 bool is_write, u16 *engine_id_1,
6425 u16 *engine_id_2)
6426 {
6427 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6428
6429 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6430 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6431
6432 switch (x_y) {
6433 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6434 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6435 dma_id[0] = 0;
6436 dma_id[1] = 2;
6437 break;
6438 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6439 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6440 dma_id[0] = 1;
6441 dma_id[1] = 3;
6442 break;
6443 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6444 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6445 dma_id[0] = 4;
6446 dma_id[1] = 6;
6447 break;
6448 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6449 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6450 dma_id[0] = 5;
6451 dma_id[1] = 7;
6452 break;
6453 default:
6454 goto unknown_initiator;
6455 }
6456
6457 for (i = 0 ; i < 2 ; i++) {
6458 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6459 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6460 }
6461
6462 switch (x_y) {
6463 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6464 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6465 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6466 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6467 return "DMA0";
6468 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6469 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6470 return "DMA2";
6471 } else {
6472 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6473 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6474 return "DMA0 or DMA2";
6475 }
6476 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6477 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6478 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6479 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6480 return "DMA1";
6481 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6482 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6483 return "DMA3";
6484 } else {
6485 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6486 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6487 return "DMA1 or DMA3";
6488 }
6489 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6490 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6491 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6492 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6493 return "DMA4";
6494 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6495 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6496 return "DMA6";
6497 } else {
6498 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6499 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6500 return "DMA4 or DMA6";
6501 }
6502 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6503 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6504 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6505 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6506 return "DMA5";
6507 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6508 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6509 return "DMA7";
6510 } else {
6511 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6512 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6513 return "DMA5 or DMA7";
6514 }
6515 }
6516
6517 unknown_initiator:
6518 return "unknown initiator";
6519 }
6520
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6521 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6522 u16 *engine_id_1, u16 *engine_id_2)
6523 {
6524 u32 val, x_y, axi_id;
6525
6526 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6527 RREG32(mmMMU_UP_RAZWI_READ_ID);
6528 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6529 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6530 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6531 RAZWI_INITIATOR_AXI_ID_SHIFT);
6532
6533 switch (x_y) {
6534 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6535 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6536 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6537 return "TPC0";
6538 }
6539 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6540 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6541 return "NIC0";
6542 }
6543 break;
6544 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6545 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6546 return "TPC1";
6547 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6548 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6549 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6550 return "MME0";
6551 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6552 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6553 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6554 return "MME1";
6555 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6556 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6557 return "TPC2";
6558 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6559 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6560 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6561 return "TPC3";
6562 }
6563 /* PCI, CPU or PSOC does not have engine id*/
6564 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6565 return "PCI";
6566 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6567 return "CPU";
6568 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6569 return "PSOC";
6570 break;
6571 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6572 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6573 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6574 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6575 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6576 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6578 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6579 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6580 engine_id_1, engine_id_2);
6581 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6582 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6583 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6584 return "TPC4";
6585 }
6586 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6587 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6588 return "NIC1";
6589 }
6590 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6591 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6592 return "NIC2";
6593 }
6594 break;
6595 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6596 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6597 return "TPC5";
6598 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6599 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6600 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6601 return "MME2";
6602 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6603 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6604 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6605 return "MME3";
6606 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6607 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6608 return "TPC6";
6609 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6610 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6611 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6612 return "TPC7";
6613 }
6614 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6615 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6616 return "NIC4";
6617 }
6618 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6619 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6620 return "NIC5";
6621 }
6622 break;
6623 default:
6624 break;
6625 }
6626
6627 dev_err(hdev->dev,
6628 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6629 val,
6630 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6631 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6632 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6633 RAZWI_INITIATOR_AXI_ID_MASK);
6634
6635 return "unknown initiator";
6636 }
6637
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u16 * engine_id_1,u16 * engine_id_2,bool * is_read,bool * is_write)6638 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6639 u16 *engine_id_2, bool *is_read, bool *is_write)
6640 {
6641
6642 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6643 dev_err_ratelimited(hdev->dev,
6644 "RAZWI event caused by illegal write of %s\n",
6645 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6646 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6647 *is_write = true;
6648 }
6649
6650 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6651 dev_err_ratelimited(hdev->dev,
6652 "RAZWI event caused by illegal read of %s\n",
6653 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6654 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6655 *is_read = true;
6656 }
6657 }
6658
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u64 * event_mask)6659 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6660 {
6661 struct gaudi_device *gaudi = hdev->asic_specific;
6662 u32 val;
6663
6664 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6665 return;
6666
6667 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6668 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6669 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6670 *addr <<= 32;
6671 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6672
6673 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6674 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6675
6676 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6677 }
6678
6679 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6680 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6681 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6682 *addr <<= 32;
6683 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6684
6685 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6686
6687 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6688 }
6689 }
6690
6691 /*
6692 * +-------------------+------------------------------------------------------+
6693 * | Configuration Reg | Description |
6694 * | Address | |
6695 * +-------------------+------------------------------------------------------+
6696 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6697 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6698 * | |0xF34 memory wrappers 63:32 |
6699 * | |0xF38 memory wrappers 95:64 |
6700 * | |0xF3C memory wrappers 127:96 |
6701 * +-------------------+------------------------------------------------------+
6702 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6703 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6704 * | |0xF44 memory wrappers 63:32 |
6705 * | |0xF48 memory wrappers 95:64 |
6706 * | |0xF4C memory wrappers 127:96 |
6707 * +-------------------+------------------------------------------------------+
6708 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6709 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6710 struct ecc_info_extract_params *params, u64 *ecc_address,
6711 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6712 {
6713 u32 i, num_mem_regs, reg, err_bit;
6714 u64 err_addr, err_word = 0;
6715
6716 num_mem_regs = params->num_memories / 32 +
6717 ((params->num_memories % 32) ? 1 : 0);
6718
6719 if (params->block_address >= CFG_BASE)
6720 params->block_address -= CFG_BASE;
6721
6722 if (params->derr)
6723 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6724 else
6725 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6726
6727 /* Set invalid wrapper index */
6728 *memory_wrapper_idx = 0xFF;
6729
6730 /* Iterate through memory wrappers, a single bit must be set */
6731 for (i = 0 ; i < num_mem_regs ; i++) {
6732 err_addr += i * 4;
6733 err_word = RREG32(err_addr);
6734 if (err_word) {
6735 err_bit = __ffs(err_word);
6736 *memory_wrapper_idx = err_bit + (32 * i);
6737 break;
6738 }
6739 }
6740
6741 if (*memory_wrapper_idx == 0xFF) {
6742 dev_err(hdev->dev, "ECC error information cannot be found\n");
6743 return -EINVAL;
6744 }
6745
6746 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6747 *memory_wrapper_idx);
6748
6749 *ecc_address =
6750 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6751 *ecc_syndrom =
6752 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6753
6754 /* Clear error indication */
6755 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6756 if (params->derr)
6757 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6758 else
6759 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6760
6761 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6762
6763 return 0;
6764 }
6765
6766 /*
6767 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6768 *
6769 * @idx: the current pi/ci value
6770 * @q_len: the queue length (power of 2)
6771 *
6772 * @return the cyclically decremented index
6773 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6774 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6775 {
6776 u32 mask = q_len - 1;
6777
6778 /*
6779 * modular decrement is equivalent to adding (queue_size -1)
6780 * later we take LSBs to make sure the value is in the
6781 * range [0, queue_len - 1]
6782 */
6783 return (idx + q_len - 1) & mask;
6784 }
6785
6786 /**
6787 * gaudi_handle_sw_config_stream_data - print SW config stream data
6788 *
6789 * @hdev: pointer to the habanalabs device structure
6790 * @stream: the QMAN's stream
6791 * @qman_base: base address of QMAN registers block
6792 * @event_mask: mask of the last events occurred
6793 */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6794 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6795 u64 qman_base, u64 event_mask)
6796 {
6797 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6798 u32 cq_ptr_lo_off, size;
6799
6800 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6801
6802 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6803 stream * cq_ptr_lo_off;
6804 cq_ptr_hi = cq_ptr_lo +
6805 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6806 cq_tsize = cq_ptr_lo +
6807 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6808
6809 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6810 size = RREG32(cq_tsize);
6811 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6812 stream, cq_ptr, size);
6813
6814 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6815 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6816 hdev->captured_err_info.undef_opcode.cq_size = size;
6817 hdev->captured_err_info.undef_opcode.stream_id = stream;
6818 }
6819 }
6820
6821 /**
6822 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6823 *
6824 * @hdev: pointer to the habanalabs device structure
6825 * @qid_base: first QID of the QMAN (out of 4 streams)
6826 * @stream: the QMAN's stream
6827 * @qman_base: base address of QMAN registers block
6828 * @event_mask: mask of the last events occurred
6829 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6830 */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6831 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6832 u32 stream, u64 qman_base,
6833 u64 event_mask,
6834 bool pr_sw_conf)
6835 {
6836 u32 ci, qm_ci_stream_off, queue_len;
6837 struct hl_hw_queue *q;
6838 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6839 int i;
6840
6841 q = &hdev->kernel_queues[qid_base + stream];
6842
6843 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6844 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6845 stream * qm_ci_stream_off;
6846
6847 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6848 q->int_queue_len : HL_QUEUE_LENGTH;
6849
6850 hdev->asic_funcs->hw_queues_lock(hdev);
6851
6852 if (pr_sw_conf)
6853 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6854
6855 ci = RREG32(pq_ci);
6856
6857 /* we should start printing form ci -1 */
6858 ci = gaudi_queue_idx_dec(ci, queue_len);
6859 memset(addr, 0, sizeof(addr));
6860
6861 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6862 struct hl_bd *bd;
6863 u32 len;
6864
6865 bd = q->kernel_address;
6866 bd += ci;
6867
6868 len = le32_to_cpu(bd->len);
6869 /* len 0 means uninitialized entry- break */
6870 if (!len)
6871 break;
6872
6873 addr[i] = le64_to_cpu(bd->ptr);
6874
6875 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6876 stream, ci, addr[i], len);
6877
6878 /* get previous ci, wrap if needed */
6879 ci = gaudi_queue_idx_dec(ci, queue_len);
6880 }
6881
6882 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6883 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6884 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6885
6886 if (arr_idx == 0) {
6887 undef_opcode->timestamp = ktime_get();
6888 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6889 }
6890
6891 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6892 undef_opcode->cb_addr_streams_len++;
6893 }
6894
6895 hdev->asic_funcs->hw_queues_unlock(hdev);
6896 }
6897
6898 /**
6899 * handle_qman_data_on_err - extract QMAN data on error
6900 *
6901 * @hdev: pointer to the habanalabs device structure
6902 * @qid_base: first QID of the QMAN (out of 4 streams)
6903 * @stream: the QMAN's stream
6904 * @qman_base: base address of QMAN registers block
6905 * @event_mask: mask of the last events occurred
6906 *
6907 * This function attempt to exatract as much data as possible on QMAN error.
6908 * On upper CP print the SW config stream data and last 8 PQEs.
6909 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6910 */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6911 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6912 u32 stream, u64 qman_base, u64 event_mask)
6913 {
6914 u32 i;
6915
6916 if (stream != QMAN_STREAMS) {
6917 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6918 qman_base, event_mask, true);
6919 return;
6920 }
6921
6922 /* handle Lower-CP */
6923 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6924
6925 for (i = 0; i < QMAN_STREAMS; i++)
6926 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6927 qman_base, event_mask, false);
6928 }
6929
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)6930 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6931 const char *qm_name,
6932 u64 qman_base,
6933 u32 qid_base,
6934 u64 *event_mask)
6935 {
6936 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6937 u64 glbl_sts_addr, arb_err_addr;
6938 char reg_desc[32];
6939
6940 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6941 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6942
6943 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6944 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6945 glbl_sts_clr_val = 0;
6946 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6947
6948 if (!glbl_sts_val)
6949 continue;
6950
6951 if (i == QMAN_STREAMS)
6952 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6953 else
6954 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6955
6956 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6957 if (glbl_sts_val & BIT(j)) {
6958 dev_err_ratelimited(hdev->dev,
6959 "%s %s. err cause: %s\n",
6960 qm_name, reg_desc,
6961 gaudi_qman_error_cause[j]);
6962 glbl_sts_clr_val |= BIT(j);
6963 }
6964 }
6965 /* check for undefined opcode */
6966 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6967 hdev->captured_err_info.undef_opcode.write_enable) {
6968 memset(&hdev->captured_err_info.undef_opcode, 0,
6969 sizeof(hdev->captured_err_info.undef_opcode));
6970
6971 hdev->captured_err_info.undef_opcode.write_enable = false;
6972 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6973 }
6974
6975 /* Write 1 clear errors */
6976 if (!hdev->stop_on_err)
6977 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6978 else
6979 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6980 }
6981
6982 arb_err_val = RREG32(arb_err_addr);
6983
6984 if (!arb_err_val)
6985 return;
6986
6987 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
6988 if (arb_err_val & BIT(j)) {
6989 dev_err_ratelimited(hdev->dev,
6990 "%s ARB_ERR. err cause: %s\n",
6991 qm_name,
6992 gaudi_qman_arb_error_cause[j]);
6993 }
6994 }
6995 }
6996
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)6997 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
6998 struct hl_eq_sm_sei_data *sei_data)
6999 {
7000 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7001
7002 /* Flip the bits as the enum is ordered in the opposite way */
7003 index = (index ^ 0x3) & 0x3;
7004
7005 switch (sei_data->sei_cause) {
7006 case SM_SEI_SO_OVERFLOW:
7007 dev_err_ratelimited(hdev->dev,
7008 "%s SEI Error: SOB Group %u overflow/underflow",
7009 gaudi_sync_manager_names[index],
7010 le32_to_cpu(sei_data->sei_log));
7011 break;
7012 case SM_SEI_LBW_4B_UNALIGNED:
7013 dev_err_ratelimited(hdev->dev,
7014 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7015 gaudi_sync_manager_names[index],
7016 le32_to_cpu(sei_data->sei_log));
7017 break;
7018 case SM_SEI_AXI_RESPONSE_ERR:
7019 dev_err_ratelimited(hdev->dev,
7020 "%s SEI Error: AXI ID %u response error",
7021 gaudi_sync_manager_names[index],
7022 le32_to_cpu(sei_data->sei_log));
7023 break;
7024 default:
7025 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7026 le32_to_cpu(sei_data->sei_log));
7027 break;
7028 }
7029 }
7030
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7031 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7032 struct hl_eq_ecc_data *ecc_data)
7033 {
7034 struct ecc_info_extract_params params;
7035 u64 ecc_address = 0, ecc_syndrom = 0;
7036 u8 index, memory_wrapper_idx = 0;
7037 bool extract_info_from_fw;
7038 int rc;
7039
7040 if (hdev->asic_prop.fw_security_enabled) {
7041 extract_info_from_fw = true;
7042 goto extract_ecc_info;
7043 }
7044
7045 switch (event_type) {
7046 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7047 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7048 extract_info_from_fw = true;
7049 break;
7050 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7051 index = event_type - GAUDI_EVENT_TPC0_SERR;
7052 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7053 params.num_memories = 90;
7054 params.derr = false;
7055 extract_info_from_fw = false;
7056 break;
7057 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7058 index = event_type - GAUDI_EVENT_TPC0_DERR;
7059 params.block_address =
7060 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7061 params.num_memories = 90;
7062 params.derr = true;
7063 extract_info_from_fw = false;
7064 break;
7065 case GAUDI_EVENT_MME0_ACC_SERR:
7066 case GAUDI_EVENT_MME1_ACC_SERR:
7067 case GAUDI_EVENT_MME2_ACC_SERR:
7068 case GAUDI_EVENT_MME3_ACC_SERR:
7069 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7070 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7071 params.num_memories = 128;
7072 params.derr = false;
7073 extract_info_from_fw = false;
7074 break;
7075 case GAUDI_EVENT_MME0_ACC_DERR:
7076 case GAUDI_EVENT_MME1_ACC_DERR:
7077 case GAUDI_EVENT_MME2_ACC_DERR:
7078 case GAUDI_EVENT_MME3_ACC_DERR:
7079 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7080 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7081 params.num_memories = 128;
7082 params.derr = true;
7083 extract_info_from_fw = false;
7084 break;
7085 case GAUDI_EVENT_MME0_SBAB_SERR:
7086 case GAUDI_EVENT_MME1_SBAB_SERR:
7087 case GAUDI_EVENT_MME2_SBAB_SERR:
7088 case GAUDI_EVENT_MME3_SBAB_SERR:
7089 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7090 params.block_address =
7091 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7092 params.num_memories = 33;
7093 params.derr = false;
7094 extract_info_from_fw = false;
7095 break;
7096 case GAUDI_EVENT_MME0_SBAB_DERR:
7097 case GAUDI_EVENT_MME1_SBAB_DERR:
7098 case GAUDI_EVENT_MME2_SBAB_DERR:
7099 case GAUDI_EVENT_MME3_SBAB_DERR:
7100 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7101 params.block_address =
7102 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7103 params.num_memories = 33;
7104 params.derr = true;
7105 extract_info_from_fw = false;
7106 break;
7107 default:
7108 return;
7109 }
7110
7111 extract_ecc_info:
7112 if (extract_info_from_fw) {
7113 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7114 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7115 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7116 } else {
7117 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7118 &ecc_syndrom, &memory_wrapper_idx);
7119 if (rc)
7120 return;
7121 }
7122
7123 dev_err(hdev->dev,
7124 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7125 ecc_address, ecc_syndrom, memory_wrapper_idx);
7126 }
7127
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7128 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7129 {
7130 u64 qman_base;
7131 char desc[32];
7132 u32 qid_base;
7133 u8 index;
7134
7135 switch (event_type) {
7136 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7137 index = event_type - GAUDI_EVENT_TPC0_QM;
7138 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7139 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7140 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7141 break;
7142 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7143 if (event_type == GAUDI_EVENT_MME0_QM) {
7144 index = 0;
7145 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7146 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7147 index = 2;
7148 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7149 }
7150 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7151 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7152 break;
7153 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7154 index = event_type - GAUDI_EVENT_DMA0_QM;
7155 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7156 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7157 if (index > 1)
7158 qid_base++;
7159 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7160 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7161 break;
7162 case GAUDI_EVENT_NIC0_QM0:
7163 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7164 qman_base = mmNIC0_QM0_BASE;
7165 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7166 break;
7167 case GAUDI_EVENT_NIC0_QM1:
7168 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7169 qman_base = mmNIC0_QM1_BASE;
7170 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7171 break;
7172 case GAUDI_EVENT_NIC1_QM0:
7173 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7174 qman_base = mmNIC1_QM0_BASE;
7175 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7176 break;
7177 case GAUDI_EVENT_NIC1_QM1:
7178 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7179 qman_base = mmNIC1_QM1_BASE;
7180 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7181 break;
7182 case GAUDI_EVENT_NIC2_QM0:
7183 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7184 qman_base = mmNIC2_QM0_BASE;
7185 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7186 break;
7187 case GAUDI_EVENT_NIC2_QM1:
7188 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7189 qman_base = mmNIC2_QM1_BASE;
7190 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7191 break;
7192 case GAUDI_EVENT_NIC3_QM0:
7193 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7194 qman_base = mmNIC3_QM0_BASE;
7195 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7196 break;
7197 case GAUDI_EVENT_NIC3_QM1:
7198 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7199 qman_base = mmNIC3_QM1_BASE;
7200 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7201 break;
7202 case GAUDI_EVENT_NIC4_QM0:
7203 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7204 qman_base = mmNIC4_QM0_BASE;
7205 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7206 break;
7207 case GAUDI_EVENT_NIC4_QM1:
7208 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7209 qman_base = mmNIC4_QM1_BASE;
7210 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7211 break;
7212 default:
7213 return;
7214 }
7215
7216 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7217 }
7218
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool check_razwi,u64 * event_mask)7219 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7220 bool check_razwi, u64 *event_mask)
7221 {
7222 bool is_read = false, is_write = false;
7223 u16 engine_id[2], num_of_razwi_eng = 0;
7224 char desc[64] = "";
7225 u64 razwi_addr = 0;
7226 u8 razwi_flags = 0;
7227
7228 /*
7229 * Init engine id by default as not valid and only if razwi initiated from engine with
7230 * engine id it will get valid value.
7231 */
7232 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7233 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7234
7235 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7236 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7237 event_type, desc);
7238
7239 if (check_razwi) {
7240 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7241 &is_write);
7242 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7243
7244 if (is_read)
7245 razwi_flags |= HL_RAZWI_READ;
7246 if (is_write)
7247 razwi_flags |= HL_RAZWI_WRITE;
7248
7249 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7250 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7251 num_of_razwi_eng = 2;
7252 else
7253 num_of_razwi_eng = 1;
7254 }
7255
7256 if (razwi_flags)
7257 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7258 razwi_flags, event_mask);
7259 }
7260 }
7261
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7262 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7263 struct cpucp_pkt_sync_err *sync_err)
7264 {
7265 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7266
7267 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7268 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7269 }
7270
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7271 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7272 struct hl_eq_fw_alive *fw_alive)
7273 {
7274 dev_err(hdev->dev,
7275 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7276 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7277 le32_to_cpu(fw_alive->process_id),
7278 le32_to_cpu(fw_alive->thread_id),
7279 le64_to_cpu(fw_alive->uptime_seconds));
7280 }
7281
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7282 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7283 void *data)
7284 {
7285 char desc[64] = "", *type;
7286 struct eq_nic_sei_event *eq_nic_sei = data;
7287 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7288
7289 switch (eq_nic_sei->axi_error_cause) {
7290 case RXB:
7291 type = "RXB";
7292 break;
7293 case RXE:
7294 type = "RXE";
7295 break;
7296 case TXS:
7297 type = "TXS";
7298 break;
7299 case TXE:
7300 type = "TXE";
7301 break;
7302 case QPC_RESP:
7303 type = "QPC_RESP";
7304 break;
7305 case NON_AXI_ERR:
7306 type = "NON_AXI_ERR";
7307 break;
7308 case TMR:
7309 type = "TMR";
7310 break;
7311 default:
7312 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7313 eq_nic_sei->axi_error_cause);
7314 type = "N/A";
7315 break;
7316 }
7317
7318 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7319 eq_nic_sei->id);
7320 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7321 event_type, desc);
7322 }
7323
gaudi_compute_reset_late_init(struct hl_device * hdev)7324 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7325 {
7326 /* GAUDI doesn't support any reset except hard-reset */
7327 return -EPERM;
7328 }
7329
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7330 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7331 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7332 {
7333 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7334 int rc = 0;
7335
7336 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7337 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7338 if (!hbm_ecc_data) {
7339 dev_err(hdev->dev, "No FW ECC data");
7340 return 0;
7341 }
7342
7343 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7344 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7345 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7346 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7347 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7348 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7349 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7350 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7351 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7352 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7353 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7354 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7355 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7356 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7357
7358 dev_err(hdev->dev,
7359 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7360 device, ch, wr_par, rd_par, ca_par, serr, derr);
7361 dev_err(hdev->dev,
7362 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7363 device, ch, hbm_ecc_data->first_addr, type,
7364 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7365 hbm_ecc_data->dec_cnt);
7366 return 0;
7367 }
7368
7369 if (hdev->asic_prop.fw_security_enabled) {
7370 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7371 return 0;
7372 }
7373
7374 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7375 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7376 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7377 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7378 if (val) {
7379 rc = -EIO;
7380 dev_err(hdev->dev,
7381 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7382 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7383 (val >> 2) & 0x1, (val >> 3) & 0x1,
7384 (val >> 4) & 0x1);
7385
7386 val2 = RREG32(base + ch * 0x1000 + 0x060);
7387 dev_err(hdev->dev,
7388 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7389 device, ch * 2,
7390 RREG32(base + ch * 0x1000 + 0x064),
7391 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7392 (val2 & 0xFF0000) >> 16,
7393 (val2 & 0xFF000000) >> 24);
7394 }
7395
7396 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7397 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7398 if (val) {
7399 rc = -EIO;
7400 dev_err(hdev->dev,
7401 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7402 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7403 (val >> 2) & 0x1, (val >> 3) & 0x1,
7404 (val >> 4) & 0x1);
7405
7406 val2 = RREG32(base + ch * 0x1000 + 0x070);
7407 dev_err(hdev->dev,
7408 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7409 device, ch * 2 + 1,
7410 RREG32(base + ch * 0x1000 + 0x074),
7411 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7412 (val2 & 0xFF0000) >> 16,
7413 (val2 & 0xFF000000) >> 24);
7414 }
7415
7416 /* Clear interrupts */
7417 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7418 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7419 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7420 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7421 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7422 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7423 }
7424
7425 val = RREG32(base + 0x8F30);
7426 val2 = RREG32(base + 0x8F34);
7427 if (val | val2) {
7428 rc = -EIO;
7429 dev_err(hdev->dev,
7430 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7431 device, val, val2);
7432 }
7433 val = RREG32(base + 0x8F40);
7434 val2 = RREG32(base + 0x8F44);
7435 if (val | val2) {
7436 rc = -EIO;
7437 dev_err(hdev->dev,
7438 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7439 device, val, val2);
7440 }
7441
7442 return rc;
7443 }
7444
gaudi_hbm_event_to_dev(u16 hbm_event_type)7445 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7446 {
7447 switch (hbm_event_type) {
7448 case GAUDI_EVENT_HBM0_SPI_0:
7449 case GAUDI_EVENT_HBM0_SPI_1:
7450 return 0;
7451 case GAUDI_EVENT_HBM1_SPI_0:
7452 case GAUDI_EVENT_HBM1_SPI_1:
7453 return 1;
7454 case GAUDI_EVENT_HBM2_SPI_0:
7455 case GAUDI_EVENT_HBM2_SPI_1:
7456 return 2;
7457 case GAUDI_EVENT_HBM3_SPI_0:
7458 case GAUDI_EVENT_HBM3_SPI_1:
7459 return 3;
7460 default:
7461 break;
7462 }
7463
7464 /* Should never happen */
7465 return 0;
7466 }
7467
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7468 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7469 char *interrupt_name)
7470 {
7471 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7472 bool soft_reset_required = false;
7473
7474 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7475 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7476
7477 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7478 if (tpc_interrupts_cause & BIT(i)) {
7479 dev_err_ratelimited(hdev->dev,
7480 "TPC%d_%s interrupt cause: %s\n",
7481 tpc_id, interrupt_name,
7482 gaudi_tpc_interrupts_cause[i]);
7483 /* If this is QM error, we need to soft-reset */
7484 if (i == 15)
7485 soft_reset_required = true;
7486 }
7487
7488 /* Clear interrupts */
7489 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7490
7491 return soft_reset_required;
7492 }
7493
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7494 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7495 {
7496 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7497 }
7498
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7499 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7500 {
7501 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7502 }
7503
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)7504 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7505 {
7506 ktime_t zero_time = ktime_set(0, 0);
7507
7508 mutex_lock(&hdev->clk_throttling.lock);
7509
7510 switch (event_type) {
7511 case GAUDI_EVENT_FIX_POWER_ENV_S:
7512 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7513 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7514 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7515 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7516 dev_info_ratelimited(hdev->dev,
7517 "Clock throttling due to power consumption\n");
7518 break;
7519
7520 case GAUDI_EVENT_FIX_POWER_ENV_E:
7521 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7522 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7523 dev_info_ratelimited(hdev->dev,
7524 "Power envelop is safe, back to optimal clock\n");
7525 break;
7526
7527 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7528 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7529 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7530 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7531 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7532 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7533 dev_info_ratelimited(hdev->dev,
7534 "Clock throttling due to overheating\n");
7535 break;
7536
7537 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7538 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7540 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7541 dev_info_ratelimited(hdev->dev,
7542 "Thermal envelop is safe, back to optimal clock\n");
7543 break;
7544
7545 default:
7546 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7547 event_type);
7548 break;
7549 }
7550
7551 mutex_unlock(&hdev->clk_throttling.lock);
7552 }
7553
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7554 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7555 {
7556 struct gaudi_device *gaudi = hdev->asic_specific;
7557 struct hl_info_fw_err_info fw_err_info;
7558 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7559 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7560 u32 fw_fatal_err_flag = 0, flags = 0;
7561 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7562 >> EQ_CTL_EVENT_TYPE_SHIFT);
7563 bool reset_required, reset_direct = false;
7564 u8 cause;
7565 int rc;
7566
7567 if (event_type >= GAUDI_EVENT_SIZE) {
7568 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7569 event_type, GAUDI_EVENT_SIZE - 1);
7570 return;
7571 }
7572
7573 gaudi->events_stat[event_type]++;
7574 gaudi->events_stat_aggregate[event_type]++;
7575
7576 switch (event_type) {
7577 case GAUDI_EVENT_PCIE_CORE_DERR:
7578 case GAUDI_EVENT_PCIE_IF_DERR:
7579 case GAUDI_EVENT_PCIE_PHY_DERR:
7580 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7581 case GAUDI_EVENT_MME0_ACC_DERR:
7582 case GAUDI_EVENT_MME0_SBAB_DERR:
7583 case GAUDI_EVENT_MME1_ACC_DERR:
7584 case GAUDI_EVENT_MME1_SBAB_DERR:
7585 case GAUDI_EVENT_MME2_ACC_DERR:
7586 case GAUDI_EVENT_MME2_SBAB_DERR:
7587 case GAUDI_EVENT_MME3_ACC_DERR:
7588 case GAUDI_EVENT_MME3_SBAB_DERR:
7589 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7590 fallthrough;
7591 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7592 case GAUDI_EVENT_PSOC_MEM_DERR:
7593 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7594 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7595 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7596 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7597 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7598 case GAUDI_EVENT_MMU_DERR:
7599 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7600 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7601 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7602 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7603 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7604 goto reset_device;
7605
7606 case GAUDI_EVENT_GIC500:
7607 case GAUDI_EVENT_AXI_ECC:
7608 case GAUDI_EVENT_L2_RAM_ECC:
7609 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7610 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7611 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7612 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7613 goto reset_device;
7614
7615 case GAUDI_EVENT_HBM0_SPI_0:
7616 case GAUDI_EVENT_HBM1_SPI_0:
7617 case GAUDI_EVENT_HBM2_SPI_0:
7618 case GAUDI_EVENT_HBM3_SPI_0:
7619 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7620 gaudi_hbm_read_interrupts(hdev,
7621 gaudi_hbm_event_to_dev(event_type),
7622 &eq_entry->hbm_ecc_data);
7623 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7624 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7625 goto reset_device;
7626
7627 case GAUDI_EVENT_HBM0_SPI_1:
7628 case GAUDI_EVENT_HBM1_SPI_1:
7629 case GAUDI_EVENT_HBM2_SPI_1:
7630 case GAUDI_EVENT_HBM3_SPI_1:
7631 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7632 gaudi_hbm_read_interrupts(hdev,
7633 gaudi_hbm_event_to_dev(event_type),
7634 &eq_entry->hbm_ecc_data);
7635 hl_fw_unmask_irq(hdev, event_type);
7636 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7637 break;
7638
7639 case GAUDI_EVENT_TPC0_DEC:
7640 case GAUDI_EVENT_TPC1_DEC:
7641 case GAUDI_EVENT_TPC2_DEC:
7642 case GAUDI_EVENT_TPC3_DEC:
7643 case GAUDI_EVENT_TPC4_DEC:
7644 case GAUDI_EVENT_TPC5_DEC:
7645 case GAUDI_EVENT_TPC6_DEC:
7646 case GAUDI_EVENT_TPC7_DEC:
7647 /* In TPC DEC event, notify on TPC assertion. While there isn't
7648 * a specific event for assertion yet, the FW generates TPC DEC event.
7649 * The SW upper layer will inspect an internal mapped area to indicate
7650 * if the event is a TPC Assertion or a "real" TPC DEC.
7651 */
7652 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7653 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7654 reset_required = gaudi_tpc_read_interrupts(hdev,
7655 tpc_dec_event_to_tpc_id(event_type),
7656 "AXI_SLV_DEC_Error");
7657 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7658 if (reset_required) {
7659 dev_err(hdev->dev, "reset required due to %s\n",
7660 gaudi_irq_map_table[event_type].name);
7661
7662 reset_direct = true;
7663 goto reset_device;
7664 } else {
7665 hl_fw_unmask_irq(hdev, event_type);
7666 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7667 }
7668 break;
7669
7670 case GAUDI_EVENT_TPC0_KRN_ERR:
7671 case GAUDI_EVENT_TPC1_KRN_ERR:
7672 case GAUDI_EVENT_TPC2_KRN_ERR:
7673 case GAUDI_EVENT_TPC3_KRN_ERR:
7674 case GAUDI_EVENT_TPC4_KRN_ERR:
7675 case GAUDI_EVENT_TPC5_KRN_ERR:
7676 case GAUDI_EVENT_TPC6_KRN_ERR:
7677 case GAUDI_EVENT_TPC7_KRN_ERR:
7678 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7679 reset_required = gaudi_tpc_read_interrupts(hdev,
7680 tpc_krn_event_to_tpc_id(event_type),
7681 "KRN_ERR");
7682 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7683 if (reset_required) {
7684 dev_err(hdev->dev, "reset required due to %s\n",
7685 gaudi_irq_map_table[event_type].name);
7686
7687 reset_direct = true;
7688 goto reset_device;
7689 } else {
7690 hl_fw_unmask_irq(hdev, event_type);
7691 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7692 }
7693 break;
7694
7695 case GAUDI_EVENT_PCIE_CORE_SERR:
7696 case GAUDI_EVENT_PCIE_IF_SERR:
7697 case GAUDI_EVENT_PCIE_PHY_SERR:
7698 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7699 case GAUDI_EVENT_MME0_ACC_SERR:
7700 case GAUDI_EVENT_MME0_SBAB_SERR:
7701 case GAUDI_EVENT_MME1_ACC_SERR:
7702 case GAUDI_EVENT_MME1_SBAB_SERR:
7703 case GAUDI_EVENT_MME2_ACC_SERR:
7704 case GAUDI_EVENT_MME2_SBAB_SERR:
7705 case GAUDI_EVENT_MME3_ACC_SERR:
7706 case GAUDI_EVENT_MME3_SBAB_SERR:
7707 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7708 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7709 case GAUDI_EVENT_PSOC_MEM_SERR:
7710 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7711 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7712 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7713 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7714 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7715 fallthrough;
7716 case GAUDI_EVENT_MMU_SERR:
7717 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7718 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7719 hl_fw_unmask_irq(hdev, event_type);
7720 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7721 break;
7722
7723 case GAUDI_EVENT_PCIE_DEC:
7724 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7725 case GAUDI_EVENT_PSOC_AXI_DEC:
7726 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7727 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7728 hl_fw_unmask_irq(hdev, event_type);
7729 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7730 break;
7731
7732 case GAUDI_EVENT_MMU_PAGE_FAULT:
7733 case GAUDI_EVENT_MMU_WR_PERM:
7734 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7735 hl_fw_unmask_irq(hdev, event_type);
7736 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7737 break;
7738
7739 case GAUDI_EVENT_MME0_WBC_RSP:
7740 case GAUDI_EVENT_MME0_SBAB0_RSP:
7741 case GAUDI_EVENT_MME1_WBC_RSP:
7742 case GAUDI_EVENT_MME1_SBAB0_RSP:
7743 case GAUDI_EVENT_MME2_WBC_RSP:
7744 case GAUDI_EVENT_MME2_SBAB0_RSP:
7745 case GAUDI_EVENT_MME3_WBC_RSP:
7746 case GAUDI_EVENT_MME3_SBAB0_RSP:
7747 case GAUDI_EVENT_RAZWI_OR_ADC:
7748 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7749 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7750 fallthrough;
7751 case GAUDI_EVENT_NIC0_QM0:
7752 case GAUDI_EVENT_NIC0_QM1:
7753 case GAUDI_EVENT_NIC1_QM0:
7754 case GAUDI_EVENT_NIC1_QM1:
7755 case GAUDI_EVENT_NIC2_QM0:
7756 case GAUDI_EVENT_NIC2_QM1:
7757 case GAUDI_EVENT_NIC3_QM0:
7758 case GAUDI_EVENT_NIC3_QM1:
7759 case GAUDI_EVENT_NIC4_QM0:
7760 case GAUDI_EVENT_NIC4_QM1:
7761 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7762 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7763 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7764 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7765 hl_fw_unmask_irq(hdev, event_type);
7766 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7767 break;
7768
7769 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7770 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7771 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7772 goto reset_device;
7773
7774 case GAUDI_EVENT_TPC0_BMON_SPMU:
7775 case GAUDI_EVENT_TPC1_BMON_SPMU:
7776 case GAUDI_EVENT_TPC2_BMON_SPMU:
7777 case GAUDI_EVENT_TPC3_BMON_SPMU:
7778 case GAUDI_EVENT_TPC4_BMON_SPMU:
7779 case GAUDI_EVENT_TPC5_BMON_SPMU:
7780 case GAUDI_EVENT_TPC6_BMON_SPMU:
7781 case GAUDI_EVENT_TPC7_BMON_SPMU:
7782 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7783 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7784 hl_fw_unmask_irq(hdev, event_type);
7785 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7786 break;
7787
7788 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7789 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7790 hl_fw_unmask_irq(hdev, event_type);
7791 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7792 break;
7793
7794 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7795 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7796 gaudi_print_sm_sei_info(hdev, event_type,
7797 &eq_entry->sm_sei_data);
7798 rc = hl_state_dump(hdev);
7799 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7800 if (rc)
7801 dev_err(hdev->dev,
7802 "Error during system state dump %d\n", rc);
7803 hl_fw_unmask_irq(hdev, event_type);
7804 break;
7805
7806 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7807 break;
7808
7809 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7810 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7811 hl_fw_unmask_irq(hdev, event_type);
7812 break;
7813
7814 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7815 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7816 dev_err(hdev->dev,
7817 "Received high temp H/W interrupt %d (cause %d)\n",
7818 event_type, cause);
7819 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7820 break;
7821
7822 case GAUDI_EVENT_DEV_RESET_REQ:
7823 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7824 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7825 goto reset_device;
7826
7827 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7828 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7829 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7830 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7831 goto reset_device;
7832
7833 case GAUDI_EVENT_FW_ALIVE_S:
7834 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7835 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7836 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7837 fw_err_info.event_id = event_type;
7838 fw_err_info.event_mask = &event_mask;
7839 hl_handle_fw_err(hdev, &fw_err_info);
7840 goto reset_device;
7841
7842 default:
7843 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7844 event_type);
7845 break;
7846 }
7847
7848 if (event_mask)
7849 hl_notifier_event_send_all(hdev, event_mask);
7850
7851 return;
7852
7853 reset_device:
7854 reset_required = true;
7855
7856 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7857 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7858
7859 /* notify on device unavailable while the reset triggered by fw */
7860 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7861 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7862 } else if (hdev->hard_reset_on_fw_events) {
7863 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7864 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7865 } else {
7866 reset_required = false;
7867 }
7868
7869 if (reset_required) {
7870 /* escalate general hw errors to critical/fatal error */
7871 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7872 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7873
7874 hl_device_cond_reset(hdev, flags, event_mask);
7875 } else {
7876 hl_fw_unmask_irq(hdev, event_type);
7877 /* Notification on occurred event needs to be sent although reset is not executed */
7878 if (event_mask)
7879 hl_notifier_event_send_all(hdev, event_mask);
7880 }
7881 }
7882
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7883 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7884 {
7885 struct gaudi_device *gaudi = hdev->asic_specific;
7886
7887 if (aggregate) {
7888 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7889 return gaudi->events_stat_aggregate;
7890 }
7891
7892 *size = (u32) sizeof(gaudi->events_stat);
7893 return gaudi->events_stat;
7894 }
7895
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7896 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7897 {
7898 struct gaudi_device *gaudi = hdev->asic_specific;
7899 u32 status, timeout_usec;
7900 int rc;
7901
7902 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7903 hdev->reset_info.hard_reset_pending)
7904 return 0;
7905
7906 if (hdev->pldm)
7907 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7908 else
7909 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7910
7911 /* L0 & L1 invalidation */
7912 WREG32(mmSTLB_INV_PS, 3);
7913 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7914 WREG32(mmSTLB_INV_PS, 2);
7915
7916 rc = hl_poll_timeout(
7917 hdev,
7918 mmSTLB_INV_PS,
7919 status,
7920 !status,
7921 1000,
7922 timeout_usec);
7923
7924 WREG32(mmSTLB_INV_SET, 0);
7925
7926 return rc;
7927 }
7928
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)7929 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7930 bool is_hard, u32 flags,
7931 u32 asid, u64 va, u64 size)
7932 {
7933 /* Treat as invalidate all because there is no range invalidation
7934 * in Gaudi
7935 */
7936 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7937 }
7938
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7939 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7940 {
7941 u32 status, timeout_usec;
7942 int rc;
7943
7944 if (hdev->pldm)
7945 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7946 else
7947 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7948
7949 WREG32(MMU_ASID, asid);
7950 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7951 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7952 WREG32(MMU_BUSY, 0x80000000);
7953
7954 rc = hl_poll_timeout(
7955 hdev,
7956 MMU_BUSY,
7957 status,
7958 !(status & 0x80000000),
7959 1000,
7960 timeout_usec);
7961
7962 if (rc) {
7963 dev_err(hdev->dev,
7964 "Timeout during MMU hop0 config of asid %d\n", asid);
7965 return rc;
7966 }
7967
7968 return 0;
7969 }
7970
gaudi_send_heartbeat(struct hl_device * hdev)7971 static int gaudi_send_heartbeat(struct hl_device *hdev)
7972 {
7973 struct gaudi_device *gaudi = hdev->asic_specific;
7974
7975 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7976 return 0;
7977
7978 return hl_fw_send_heartbeat(hdev);
7979 }
7980
gaudi_cpucp_info_get(struct hl_device * hdev)7981 static int gaudi_cpucp_info_get(struct hl_device *hdev)
7982 {
7983 struct gaudi_device *gaudi = hdev->asic_specific;
7984 struct asic_fixed_properties *prop = &hdev->asic_prop;
7985 int rc;
7986
7987 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7988 return 0;
7989
7990 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
7991 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
7992 mmCPU_BOOT_ERR1);
7993 if (rc)
7994 return rc;
7995
7996 if (!strlen(prop->cpucp_info.card_name))
7997 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
7998 CARD_NAME_MAX_LEN);
7999
8000 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8001
8002 set_default_power_values(hdev);
8003
8004 return 0;
8005 }
8006
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8007 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8008 struct engines_data *e)
8009 {
8010 struct gaudi_device *gaudi = hdev->asic_specific;
8011 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8012 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8013 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8014 unsigned long *mask = (unsigned long *)mask_arr;
8015 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8016 bool is_idle = true, is_eng_idle, is_slave;
8017 u64 offset;
8018 int i, dma_id, port;
8019
8020 if (e)
8021 hl_engine_data_sprintf(e,
8022 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8023 "--- ------- ------------ ---------- -------------\n");
8024
8025 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8026 dma_id = gaudi_dma_assignment[i];
8027 offset = dma_id * DMA_QMAN_OFFSET;
8028
8029 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8030 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8031 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8032 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8033 IS_DMA_IDLE(dma_core_sts0);
8034 is_idle &= is_eng_idle;
8035
8036 if (mask && !is_eng_idle)
8037 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8038 if (e)
8039 hl_engine_data_sprintf(e, fmt, dma_id,
8040 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8041 qm_cgm_sts, dma_core_sts0);
8042 }
8043
8044 if (e)
8045 hl_engine_data_sprintf(e,
8046 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8047 "--- ------- ------------ ---------- ----------\n");
8048
8049 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8050 offset = i * TPC_QMAN_OFFSET;
8051 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8052 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8053 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8054 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8055 IS_TPC_IDLE(tpc_cfg_sts);
8056 is_idle &= is_eng_idle;
8057
8058 if (mask && !is_eng_idle)
8059 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8060 if (e)
8061 hl_engine_data_sprintf(e, fmt, i,
8062 is_eng_idle ? "Y" : "N",
8063 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8064 }
8065
8066 if (e)
8067 hl_engine_data_sprintf(e,
8068 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8069 "--- ------- ------------ ---------- -----------\n");
8070
8071 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8072 offset = i * MME_QMAN_OFFSET;
8073 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8074 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8075
8076 /* MME 1 & 3 are slaves, no need to check their QMANs */
8077 is_slave = i % 2;
8078 if (!is_slave) {
8079 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8080 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8081 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8082 }
8083
8084 is_idle &= is_eng_idle;
8085
8086 if (mask && !is_eng_idle)
8087 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8088 if (e) {
8089 if (!is_slave)
8090 hl_engine_data_sprintf(e, fmt, i,
8091 is_eng_idle ? "Y" : "N",
8092 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8093 else
8094 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8095 is_eng_idle ? "Y" : "N", "-",
8096 "-", mme_arch_sts);
8097 }
8098 }
8099
8100 if (e)
8101 hl_engine_data_sprintf(e,
8102 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8103 "--- ------- ------------ ----------\n");
8104
8105 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8106 offset = i * NIC_MACRO_QMAN_OFFSET;
8107 port = 2 * i;
8108 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8109 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8110 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8111 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8112 is_idle &= is_eng_idle;
8113
8114 if (mask && !is_eng_idle)
8115 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8116 if (e)
8117 hl_engine_data_sprintf(e, nic_fmt, port,
8118 is_eng_idle ? "Y" : "N",
8119 qm_glbl_sts0, qm_cgm_sts);
8120 }
8121
8122 port = 2 * i + 1;
8123 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8124 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8125 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8126 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8127 is_idle &= is_eng_idle;
8128
8129 if (mask && !is_eng_idle)
8130 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8131 if (e)
8132 hl_engine_data_sprintf(e, nic_fmt, port,
8133 is_eng_idle ? "Y" : "N",
8134 qm_glbl_sts0, qm_cgm_sts);
8135 }
8136 }
8137
8138 if (e)
8139 hl_engine_data_sprintf(e, "\n");
8140
8141 return is_idle;
8142 }
8143
gaudi_hw_queues_lock(struct hl_device * hdev)8144 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8145 __acquires(&gaudi->hw_queues_lock)
8146 {
8147 struct gaudi_device *gaudi = hdev->asic_specific;
8148
8149 spin_lock(&gaudi->hw_queues_lock);
8150 }
8151
gaudi_hw_queues_unlock(struct hl_device * hdev)8152 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8153 __releases(&gaudi->hw_queues_lock)
8154 {
8155 struct gaudi_device *gaudi = hdev->asic_specific;
8156
8157 spin_unlock(&gaudi->hw_queues_lock);
8158 }
8159
gaudi_get_pci_id(struct hl_device * hdev)8160 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8161 {
8162 return hdev->pdev->device;
8163 }
8164
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8165 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8166 size_t max_size)
8167 {
8168 struct gaudi_device *gaudi = hdev->asic_specific;
8169
8170 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8171 return 0;
8172
8173 return hl_fw_get_eeprom_data(hdev, data, max_size);
8174 }
8175
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8176 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8177 {
8178 struct gaudi_device *gaudi = hdev->asic_specific;
8179
8180 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8181 return 0;
8182
8183 return hl_fw_get_monitor_dump(hdev, data);
8184 }
8185
8186 /*
8187 * this function should be used only during initialization and/or after reset,
8188 * when there are no active users.
8189 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8190 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8191 {
8192 u64 kernel_timeout;
8193 u32 status, offset;
8194 int rc;
8195
8196 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8197
8198 if (hdev->pldm)
8199 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8200 else
8201 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8202
8203 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8204 lower_32_bits(tpc_kernel));
8205 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8206 upper_32_bits(tpc_kernel));
8207
8208 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8209 lower_32_bits(tpc_kernel));
8210 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8211 upper_32_bits(tpc_kernel));
8212 /* set a valid LUT pointer, content is of no significance */
8213 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8214 lower_32_bits(tpc_kernel));
8215 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8216 upper_32_bits(tpc_kernel));
8217
8218 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8219 lower_32_bits(CFG_BASE +
8220 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8221
8222 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8223 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8224 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8225 /* wait a bit for the engine to start executing */
8226 usleep_range(1000, 1500);
8227
8228 /* wait until engine has finished executing */
8229 rc = hl_poll_timeout(
8230 hdev,
8231 mmTPC0_CFG_STATUS + offset,
8232 status,
8233 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8234 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8235 1000,
8236 kernel_timeout);
8237
8238 if (rc) {
8239 dev_err(hdev->dev,
8240 "Timeout while waiting for TPC%d icache prefetch\n",
8241 tpc_id);
8242 return -EIO;
8243 }
8244
8245 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8246 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8247
8248 /* wait a bit for the engine to start executing */
8249 usleep_range(1000, 1500);
8250
8251 /* wait until engine has finished executing */
8252 rc = hl_poll_timeout(
8253 hdev,
8254 mmTPC0_CFG_STATUS + offset,
8255 status,
8256 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8257 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8258 1000,
8259 kernel_timeout);
8260
8261 if (rc) {
8262 dev_err(hdev->dev,
8263 "Timeout while waiting for TPC%d vector pipe\n",
8264 tpc_id);
8265 return -EIO;
8266 }
8267
8268 rc = hl_poll_timeout(
8269 hdev,
8270 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8271 status,
8272 (status == 0),
8273 1000,
8274 kernel_timeout);
8275
8276 if (rc) {
8277 dev_err(hdev->dev,
8278 "Timeout while waiting for TPC%d kernel to execute\n",
8279 tpc_id);
8280 return -EIO;
8281 }
8282
8283 return 0;
8284 }
8285
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8286 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8287 struct hl_ctx *ctx)
8288 {
8289 struct gaudi_device *gaudi = hdev->asic_specific;
8290 int min_alloc_order, rc, collective_cb_size;
8291
8292 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8293 return 0;
8294
8295 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8296 HOST_SPACE_INTERNAL_CB_SZ,
8297 &hdev->internal_cb_pool_dma_addr,
8298 GFP_KERNEL | __GFP_ZERO);
8299
8300 if (!hdev->internal_cb_pool_virt_addr)
8301 return -ENOMEM;
8302
8303 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8304 sizeof(struct packet_fence);
8305 min_alloc_order = ilog2(collective_cb_size);
8306
8307 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8308 if (!hdev->internal_cb_pool) {
8309 dev_err(hdev->dev,
8310 "Failed to create internal CB pool\n");
8311 rc = -ENOMEM;
8312 goto free_internal_cb_pool;
8313 }
8314
8315 rc = gen_pool_add(hdev->internal_cb_pool,
8316 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8317 HOST_SPACE_INTERNAL_CB_SZ, -1);
8318 if (rc) {
8319 dev_err(hdev->dev,
8320 "Failed to add memory to internal CB pool\n");
8321 rc = -EFAULT;
8322 goto destroy_internal_cb_pool;
8323 }
8324
8325 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8326 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8327 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8328
8329 if (!hdev->internal_cb_va_base) {
8330 rc = -ENOMEM;
8331 goto destroy_internal_cb_pool;
8332 }
8333
8334 mutex_lock(&hdev->mmu_lock);
8335
8336 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8337 hdev->internal_cb_pool_dma_addr,
8338 HOST_SPACE_INTERNAL_CB_SZ);
8339 if (rc)
8340 goto unreserve_internal_cb_pool;
8341
8342 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8343 if (rc)
8344 goto unmap_internal_cb_pool;
8345
8346 mutex_unlock(&hdev->mmu_lock);
8347
8348 return 0;
8349
8350 unmap_internal_cb_pool:
8351 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8352 HOST_SPACE_INTERNAL_CB_SZ);
8353 unreserve_internal_cb_pool:
8354 mutex_unlock(&hdev->mmu_lock);
8355 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8356 HOST_SPACE_INTERNAL_CB_SZ);
8357 destroy_internal_cb_pool:
8358 gen_pool_destroy(hdev->internal_cb_pool);
8359 free_internal_cb_pool:
8360 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8361 hdev->internal_cb_pool_dma_addr);
8362
8363 return rc;
8364 }
8365
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8366 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8367 struct hl_ctx *ctx)
8368 {
8369 struct gaudi_device *gaudi = hdev->asic_specific;
8370
8371 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8372 return;
8373
8374 mutex_lock(&hdev->mmu_lock);
8375 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8376 HOST_SPACE_INTERNAL_CB_SZ);
8377 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8378 HOST_SPACE_INTERNAL_CB_SZ);
8379 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8380 mutex_unlock(&hdev->mmu_lock);
8381
8382 gen_pool_destroy(hdev->internal_cb_pool);
8383
8384 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8385 hdev->internal_cb_pool_dma_addr);
8386 }
8387
gaudi_ctx_init(struct hl_ctx * ctx)8388 static int gaudi_ctx_init(struct hl_ctx *ctx)
8389 {
8390 int rc;
8391
8392 if (ctx->asid == HL_KERNEL_ASID_ID)
8393 return 0;
8394
8395 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8396 if (rc)
8397 return rc;
8398
8399 rc = gaudi_restore_user_registers(ctx->hdev);
8400 if (rc)
8401 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8402
8403 return rc;
8404 }
8405
gaudi_ctx_fini(struct hl_ctx * ctx)8406 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8407 {
8408 if (ctx->asid == HL_KERNEL_ASID_ID)
8409 return;
8410
8411 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8412 }
8413
gaudi_pre_schedule_cs(struct hl_cs * cs)8414 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8415 {
8416 return 0;
8417 }
8418
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8419 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8420 {
8421 return gaudi_cq_assignment[cq_idx];
8422 }
8423
gaudi_get_signal_cb_size(struct hl_device * hdev)8424 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8425 {
8426 return sizeof(struct packet_msg_short) +
8427 sizeof(struct packet_msg_prot) * 2;
8428 }
8429
gaudi_get_wait_cb_size(struct hl_device * hdev)8430 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8431 {
8432 return sizeof(struct packet_msg_short) * 4 +
8433 sizeof(struct packet_fence) +
8434 sizeof(struct packet_msg_prot) * 2;
8435 }
8436
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8437 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8438 {
8439 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8440 }
8441
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8442 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8443 u32 size, bool eb)
8444 {
8445 struct hl_cb *cb = (struct hl_cb *) data;
8446 struct packet_msg_short *pkt;
8447 u32 value, ctl, pkt_size = sizeof(*pkt);
8448
8449 pkt = cb->kernel_address + size;
8450 memset(pkt, 0, pkt_size);
8451
8452 /* Inc by 1, Mode ADD */
8453 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8454 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8455
8456 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8457 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8458 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8459 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8460 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8461 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8462 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8463
8464 pkt->value = cpu_to_le32(value);
8465 pkt->ctl = cpu_to_le32(ctl);
8466
8467 return size + pkt_size;
8468 }
8469
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8470 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8471 u16 addr)
8472 {
8473 u32 ctl, pkt_size = sizeof(*pkt);
8474
8475 memset(pkt, 0, pkt_size);
8476
8477 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8478 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8479 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8480 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8481 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8482 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8483
8484 pkt->value = cpu_to_le32(value);
8485 pkt->ctl = cpu_to_le32(ctl);
8486
8487 return pkt_size;
8488 }
8489
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8490 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8491 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8492 u16 sob_val, u16 mon_id)
8493 {
8494 u64 monitor_base;
8495 u32 ctl, value, pkt_size = sizeof(*pkt);
8496 u16 msg_addr_offset;
8497 u8 mask;
8498
8499 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8500 dev_err(hdev->dev,
8501 "sob_base %u (mask %#x) is not valid\n",
8502 sob_base, sob_mask);
8503 return 0;
8504 }
8505
8506 /*
8507 * monitor_base should be the content of the base0 address registers,
8508 * so it will be added to the msg short offsets
8509 */
8510 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8511
8512 msg_addr_offset =
8513 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8514 monitor_base;
8515
8516 memset(pkt, 0, pkt_size);
8517
8518 /* Monitor config packet: bind the monitor to a sync object */
8519 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8520 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8521 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8522 0); /* GREATER OR EQUAL*/
8523 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8524
8525 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8526 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8527 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8528 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8529 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8530 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8531 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8532
8533 pkt->value = cpu_to_le32(value);
8534 pkt->ctl = cpu_to_le32(ctl);
8535
8536 return pkt_size;
8537 }
8538
gaudi_add_fence_pkt(struct packet_fence * pkt)8539 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8540 {
8541 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8542
8543 memset(pkt, 0, pkt_size);
8544
8545 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8546 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8547 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8548
8549 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8551 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8552 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8553
8554 pkt->cfg = cpu_to_le32(cfg);
8555 pkt->ctl = cpu_to_le32(ctl);
8556
8557 return pkt_size;
8558 }
8559
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8560 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8561 {
8562 u32 offset, nic_index;
8563
8564 switch (queue_id) {
8565 case GAUDI_QUEUE_ID_DMA_0_0:
8566 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8567 break;
8568 case GAUDI_QUEUE_ID_DMA_0_1:
8569 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8570 break;
8571 case GAUDI_QUEUE_ID_DMA_0_2:
8572 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8573 break;
8574 case GAUDI_QUEUE_ID_DMA_0_3:
8575 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8576 break;
8577 case GAUDI_QUEUE_ID_DMA_1_0:
8578 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8579 break;
8580 case GAUDI_QUEUE_ID_DMA_1_1:
8581 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8582 break;
8583 case GAUDI_QUEUE_ID_DMA_1_2:
8584 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8585 break;
8586 case GAUDI_QUEUE_ID_DMA_1_3:
8587 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8588 break;
8589 case GAUDI_QUEUE_ID_DMA_5_0:
8590 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8591 break;
8592 case GAUDI_QUEUE_ID_DMA_5_1:
8593 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8594 break;
8595 case GAUDI_QUEUE_ID_DMA_5_2:
8596 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8597 break;
8598 case GAUDI_QUEUE_ID_DMA_5_3:
8599 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8600 break;
8601 case GAUDI_QUEUE_ID_TPC_7_0:
8602 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8603 break;
8604 case GAUDI_QUEUE_ID_TPC_7_1:
8605 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8606 break;
8607 case GAUDI_QUEUE_ID_TPC_7_2:
8608 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8609 break;
8610 case GAUDI_QUEUE_ID_TPC_7_3:
8611 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8612 break;
8613 case GAUDI_QUEUE_ID_NIC_0_0:
8614 case GAUDI_QUEUE_ID_NIC_1_0:
8615 case GAUDI_QUEUE_ID_NIC_2_0:
8616 case GAUDI_QUEUE_ID_NIC_3_0:
8617 case GAUDI_QUEUE_ID_NIC_4_0:
8618 case GAUDI_QUEUE_ID_NIC_5_0:
8619 case GAUDI_QUEUE_ID_NIC_6_0:
8620 case GAUDI_QUEUE_ID_NIC_7_0:
8621 case GAUDI_QUEUE_ID_NIC_8_0:
8622 case GAUDI_QUEUE_ID_NIC_9_0:
8623 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8624 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8625 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8626 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8627 break;
8628 case GAUDI_QUEUE_ID_NIC_0_1:
8629 case GAUDI_QUEUE_ID_NIC_1_1:
8630 case GAUDI_QUEUE_ID_NIC_2_1:
8631 case GAUDI_QUEUE_ID_NIC_3_1:
8632 case GAUDI_QUEUE_ID_NIC_4_1:
8633 case GAUDI_QUEUE_ID_NIC_5_1:
8634 case GAUDI_QUEUE_ID_NIC_6_1:
8635 case GAUDI_QUEUE_ID_NIC_7_1:
8636 case GAUDI_QUEUE_ID_NIC_8_1:
8637 case GAUDI_QUEUE_ID_NIC_9_1:
8638 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8639 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8640 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8641 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8642 break;
8643 case GAUDI_QUEUE_ID_NIC_0_2:
8644 case GAUDI_QUEUE_ID_NIC_1_2:
8645 case GAUDI_QUEUE_ID_NIC_2_2:
8646 case GAUDI_QUEUE_ID_NIC_3_2:
8647 case GAUDI_QUEUE_ID_NIC_4_2:
8648 case GAUDI_QUEUE_ID_NIC_5_2:
8649 case GAUDI_QUEUE_ID_NIC_6_2:
8650 case GAUDI_QUEUE_ID_NIC_7_2:
8651 case GAUDI_QUEUE_ID_NIC_8_2:
8652 case GAUDI_QUEUE_ID_NIC_9_2:
8653 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8654 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8655 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8656 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8657 break;
8658 case GAUDI_QUEUE_ID_NIC_0_3:
8659 case GAUDI_QUEUE_ID_NIC_1_3:
8660 case GAUDI_QUEUE_ID_NIC_2_3:
8661 case GAUDI_QUEUE_ID_NIC_3_3:
8662 case GAUDI_QUEUE_ID_NIC_4_3:
8663 case GAUDI_QUEUE_ID_NIC_5_3:
8664 case GAUDI_QUEUE_ID_NIC_6_3:
8665 case GAUDI_QUEUE_ID_NIC_7_3:
8666 case GAUDI_QUEUE_ID_NIC_8_3:
8667 case GAUDI_QUEUE_ID_NIC_9_3:
8668 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8669 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8670 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8671 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8672 break;
8673 default:
8674 return -EINVAL;
8675 }
8676
8677 *addr = CFG_BASE + offset;
8678
8679 return 0;
8680 }
8681
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8682 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8683 {
8684 u64 monitor_base;
8685 u32 size = 0;
8686 u16 msg_addr_offset;
8687
8688 /*
8689 * monitor_base should be the content of the base0 address registers,
8690 * so it will be added to the msg short offsets
8691 */
8692 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8693
8694 /* First monitor config packet: low address of the sync */
8695 msg_addr_offset =
8696 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8697 monitor_base;
8698
8699 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8700 msg_addr_offset);
8701
8702 /* Second monitor config packet: high address of the sync */
8703 msg_addr_offset =
8704 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8705 monitor_base;
8706
8707 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8708 msg_addr_offset);
8709
8710 /*
8711 * Third monitor config packet: the payload, i.e. what to write when the
8712 * sync triggers
8713 */
8714 msg_addr_offset =
8715 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8716 monitor_base;
8717
8718 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8719
8720 return size;
8721 }
8722
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8723 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8724 struct hl_gen_wait_properties *prop)
8725 {
8726 struct hl_cb *cb = (struct hl_cb *) prop->data;
8727 void *buf = cb->kernel_address;
8728 u64 fence_addr = 0;
8729 u32 size = prop->size;
8730
8731 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8732 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8733 prop->q_idx);
8734 return 0;
8735 }
8736
8737 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8738 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8739 prop->sob_mask, prop->sob_val, prop->mon_id);
8740 size += gaudi_add_fence_pkt(buf + size);
8741
8742 return size;
8743 }
8744
gaudi_reset_sob(struct hl_device * hdev,void * data)8745 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8746 {
8747 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8748
8749 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8750 hw_sob->sob_id);
8751
8752 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8753 hw_sob->sob_id * 4, 0);
8754
8755 kref_init(&hw_sob->kref);
8756 }
8757
gaudi_get_device_time(struct hl_device * hdev)8758 static u64 gaudi_get_device_time(struct hl_device *hdev)
8759 {
8760 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8761
8762 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8763 }
8764
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8765 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8766 u32 *block_size, u32 *block_id)
8767 {
8768 return -EPERM;
8769 }
8770
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8771 static int gaudi_block_mmap(struct hl_device *hdev,
8772 struct vm_area_struct *vma,
8773 u32 block_id, u32 block_size)
8774 {
8775 return -EPERM;
8776 }
8777
gaudi_enable_events_from_fw(struct hl_device * hdev)8778 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8779 {
8780 struct cpu_dyn_regs *dyn_regs =
8781 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8782 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8783 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8784 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8785
8786 WREG32(irq_handler_offset,
8787 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8788 }
8789
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8790 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8791 {
8792 return -EINVAL;
8793 }
8794
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8795 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8796 {
8797 switch (pll_idx) {
8798 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8799 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8800 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8801 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8802 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8803 case HL_GAUDI_MME_PLL: return MME_PLL;
8804 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8805 case HL_GAUDI_IF_PLL: return IF_PLL;
8806 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8807 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8808 default: return -EINVAL;
8809 }
8810 }
8811
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8812 static int gaudi_add_sync_to_engine_map_entry(
8813 struct hl_sync_to_engine_map *map, u32 reg_value,
8814 enum hl_sync_engine_type engine_type, u32 engine_id)
8815 {
8816 struct hl_sync_to_engine_map_entry *entry;
8817
8818 /* Reg value represents a partial address of sync object,
8819 * it is used as unique identifier. For this we need to
8820 * clear the cutoff cfg base bits from the value.
8821 */
8822 if (reg_value == 0 || reg_value == 0xffffffff)
8823 return 0;
8824 reg_value -= lower_32_bits(CFG_BASE);
8825
8826 /* create a new hash entry */
8827 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
8828 if (!entry)
8829 return -ENOMEM;
8830 entry->engine_type = engine_type;
8831 entry->engine_id = engine_id;
8832 entry->sync_id = reg_value;
8833 hash_add(map->tb, &entry->node, reg_value);
8834
8835 return 0;
8836 }
8837
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8838 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8839 struct hl_sync_to_engine_map *map)
8840 {
8841 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8842 int i, j, rc;
8843 u32 reg_value;
8844
8845 /* Iterate over TPC engines */
8846 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8847
8848 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8849 sds->props[SP_NEXT_TPC] * i);
8850
8851 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8852 ENGINE_TPC, i);
8853 if (rc)
8854 goto free_sync_to_engine_map;
8855 }
8856
8857 /* Iterate over MME engines */
8858 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8859 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8860
8861 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8862 sds->props[SP_NEXT_MME] * i +
8863 j * sizeof(u32));
8864
8865 rc = gaudi_add_sync_to_engine_map_entry(
8866 map, reg_value, ENGINE_MME,
8867 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8868 if (rc)
8869 goto free_sync_to_engine_map;
8870 }
8871 }
8872
8873 /* Iterate over DMA engines */
8874 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8875 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8876 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8877 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8878 ENGINE_DMA, i);
8879 if (rc)
8880 goto free_sync_to_engine_map;
8881 }
8882
8883 return 0;
8884
8885 free_sync_to_engine_map:
8886 hl_state_dump_free_sync_to_engine_map(map);
8887
8888 return rc;
8889 }
8890
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8891 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8892 {
8893 return FIELD_GET(
8894 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8895 mon->status);
8896 }
8897
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8898 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8899 {
8900 const size_t max_write = 10;
8901 u32 gid, mask, sob;
8902 int i, offset;
8903
8904 /* Sync object ID is calculated as follows:
8905 * (8 * group_id + cleared bits in mask)
8906 */
8907 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8908 mon->arm_data);
8909 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8910 mon->arm_data);
8911
8912 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8913 max_write; mask >>= 1, i++) {
8914 if (!(mask & 1)) {
8915 sob = gid * MONITOR_MAX_SOBS + i;
8916
8917 if (offset > 0)
8918 offset += snprintf(sobs + offset, max_write,
8919 ", ");
8920
8921 offset += snprintf(sobs + offset, max_write, "%u", sob);
8922 }
8923 }
8924 }
8925
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8926 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8927 struct hl_device *hdev,
8928 struct hl_mon_state_dump *mon)
8929 {
8930 const char *name;
8931 char scratch_buf1[BIN_REG_STRING_SIZE],
8932 scratch_buf2[BIN_REG_STRING_SIZE];
8933 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8934
8935 name = hl_state_dump_get_monitor_name(hdev, mon);
8936 if (!name)
8937 name = "";
8938
8939 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8940
8941 return hl_snprintf_resize(
8942 buf, size, offset,
8943 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8944 mon->id, name,
8945 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8946 mon->arm_data),
8947 hl_format_as_binary(
8948 scratch_buf1, sizeof(scratch_buf1),
8949 FIELD_GET(
8950 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8951 mon->arm_data)),
8952 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8953 mon->arm_data),
8954 mon->wr_data,
8955 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8956 hl_format_as_binary(
8957 scratch_buf2, sizeof(scratch_buf2),
8958 FIELD_GET(
8959 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8960 mon->status)),
8961 monitored_sobs);
8962 }
8963
8964
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)8965 static int gaudi_print_fences_single_engine(
8966 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8967 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8968 size_t *size, size_t *offset)
8969 {
8970 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8971 int rc = -ENOMEM, i;
8972 u32 *statuses, *fences;
8973
8974 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8975 sizeof(*statuses), GFP_KERNEL);
8976 if (!statuses)
8977 goto out;
8978
8979 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8980 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8981 sizeof(*fences), GFP_KERNEL);
8982 if (!fences)
8983 goto free_status;
8984
8985 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
8986 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
8987
8988 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
8989 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
8990 fences[i] = RREG32(base_offset + i * sizeof(u32));
8991
8992 /* The actual print */
8993 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
8994 u32 fence_id;
8995 u64 fence_cnt, fence_rdata;
8996 const char *engine_name;
8997
8998 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
8999 statuses[i]))
9000 continue;
9001
9002 fence_id =
9003 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9004 fence_cnt = base_offset + CFG_BASE +
9005 sizeof(u32) *
9006 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9007 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9008 sds->props[SP_FENCE0_RDATA_OFFSET];
9009 engine_name = hl_sync_engine_to_string(engine_type);
9010
9011 rc = hl_snprintf_resize(
9012 buf, size, offset,
9013 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9014 engine_name, engine_id,
9015 i, fence_id,
9016 fence_cnt, engine_name, engine_id, fence_id, i,
9017 fence_rdata, engine_name, engine_id, fence_id, i,
9018 fences[fence_id],
9019 statuses[i]);
9020 if (rc)
9021 goto free_fences;
9022 }
9023
9024 rc = 0;
9025
9026 free_fences:
9027 kfree(fences);
9028 free_status:
9029 kfree(statuses);
9030 out:
9031 return rc;
9032 }
9033
9034
9035 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9036 .monitor_valid = gaudi_monitor_valid,
9037 .print_single_monitor = gaudi_print_single_monitor,
9038 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9039 .print_fences_single_engine = gaudi_print_fences_single_engine,
9040 };
9041
gaudi_state_dump_init(struct hl_device * hdev)9042 static void gaudi_state_dump_init(struct hl_device *hdev)
9043 {
9044 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9045 int i;
9046
9047 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9048 hash_add(sds->so_id_to_str_tb,
9049 &gaudi_so_id_to_str[i].node,
9050 gaudi_so_id_to_str[i].id);
9051
9052 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9053 hash_add(sds->monitor_id_to_str_tb,
9054 &gaudi_monitor_id_to_str[i].node,
9055 gaudi_monitor_id_to_str[i].id);
9056
9057 sds->props = gaudi_state_dump_specs_props;
9058
9059 sds->sync_namager_names = gaudi_sync_manager_names;
9060
9061 sds->funcs = gaudi_state_dump_funcs;
9062 }
9063
gaudi_get_stream_master_qid_arr(void)9064 static u32 *gaudi_get_stream_master_qid_arr(void)
9065 {
9066 return gaudi_stream_master;
9067 }
9068
gaudi_set_dram_properties(struct hl_device * hdev)9069 static int gaudi_set_dram_properties(struct hl_device *hdev)
9070 {
9071 return 0;
9072 }
9073
gaudi_set_binning_masks(struct hl_device * hdev)9074 static int gaudi_set_binning_masks(struct hl_device *hdev)
9075 {
9076 return 0;
9077 }
9078
gaudi_check_if_razwi_happened(struct hl_device * hdev)9079 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9080 {
9081 }
9082
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9083 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9084 {
9085 struct hl_device *hdev = dev_get_drvdata(dev);
9086 struct cpucp_info *cpucp_info;
9087
9088 cpucp_info = &hdev->asic_prop.cpucp_info;
9089
9090 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9091 }
9092
9093 static DEVICE_ATTR_RO(infineon_ver);
9094
9095 static struct attribute *gaudi_vrm_dev_attrs[] = {
9096 &dev_attr_infineon_ver.attr,
9097 NULL,
9098 };
9099
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9100 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9101 struct attribute_group *dev_vrm_attr_grp)
9102 {
9103 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9104 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9105 }
9106
gaudi_send_device_activity(struct hl_device * hdev,bool open)9107 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9108 {
9109 return 0;
9110 }
9111
9112 static const struct hl_asic_funcs gaudi_funcs = {
9113 .early_init = gaudi_early_init,
9114 .early_fini = gaudi_early_fini,
9115 .late_init = gaudi_late_init,
9116 .late_fini = gaudi_late_fini,
9117 .sw_init = gaudi_sw_init,
9118 .sw_fini = gaudi_sw_fini,
9119 .hw_init = gaudi_hw_init,
9120 .hw_fini = gaudi_hw_fini,
9121 .halt_engines = gaudi_halt_engines,
9122 .suspend = gaudi_suspend,
9123 .resume = gaudi_resume,
9124 .mmap = gaudi_mmap,
9125 .ring_doorbell = gaudi_ring_doorbell,
9126 .pqe_write = gaudi_pqe_write,
9127 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9128 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9129 .scrub_device_mem = gaudi_scrub_device_mem,
9130 .scrub_device_dram = gaudi_scrub_device_dram,
9131 .get_int_queue_base = gaudi_get_int_queue_base,
9132 .test_queues = gaudi_test_queues,
9133 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9134 .asic_dma_pool_free = gaudi_dma_pool_free,
9135 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9136 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9137 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9138 .cs_parser = gaudi_cs_parser,
9139 .dma_map_sgtable = hl_asic_dma_map_sgtable,
9140 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9141 .update_eq_ci = gaudi_update_eq_ci,
9142 .context_switch = gaudi_context_switch,
9143 .restore_phase_topology = gaudi_restore_phase_topology,
9144 .debugfs_read_dma = gaudi_debugfs_read_dma,
9145 .add_device_attr = gaudi_add_device_attr,
9146 .handle_eqe = gaudi_handle_eqe,
9147 .get_events_stat = gaudi_get_events_stat,
9148 .read_pte = gaudi_read_pte,
9149 .write_pte = gaudi_write_pte,
9150 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9151 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9152 .mmu_prefetch_cache_range = NULL,
9153 .send_heartbeat = gaudi_send_heartbeat,
9154 .debug_coresight = gaudi_debug_coresight,
9155 .is_device_idle = gaudi_is_device_idle,
9156 .compute_reset_late_init = gaudi_compute_reset_late_init,
9157 .hw_queues_lock = gaudi_hw_queues_lock,
9158 .hw_queues_unlock = gaudi_hw_queues_unlock,
9159 .get_pci_id = gaudi_get_pci_id,
9160 .get_eeprom_data = gaudi_get_eeprom_data,
9161 .get_monitor_dump = gaudi_get_monitor_dump,
9162 .send_cpu_message = gaudi_send_cpu_message,
9163 .pci_bars_map = gaudi_pci_bars_map,
9164 .init_iatu = gaudi_init_iatu,
9165 .rreg = hl_rreg,
9166 .wreg = hl_wreg,
9167 .halt_coresight = gaudi_halt_coresight,
9168 .ctx_init = gaudi_ctx_init,
9169 .ctx_fini = gaudi_ctx_fini,
9170 .pre_schedule_cs = gaudi_pre_schedule_cs,
9171 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9172 .load_firmware_to_device = gaudi_load_firmware_to_device,
9173 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9174 .get_signal_cb_size = gaudi_get_signal_cb_size,
9175 .get_wait_cb_size = gaudi_get_wait_cb_size,
9176 .gen_signal_cb = gaudi_gen_signal_cb,
9177 .gen_wait_cb = gaudi_gen_wait_cb,
9178 .reset_sob = gaudi_reset_sob,
9179 .reset_sob_group = gaudi_reset_sob_group,
9180 .get_device_time = gaudi_get_device_time,
9181 .pb_print_security_errors = NULL,
9182 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9183 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9184 .get_dec_base_addr = NULL,
9185 .scramble_addr = hl_mmu_scramble_addr,
9186 .descramble_addr = hl_mmu_descramble_addr,
9187 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9188 .get_hw_block_id = gaudi_get_hw_block_id,
9189 .hw_block_mmap = gaudi_block_mmap,
9190 .enable_events_from_fw = gaudi_enable_events_from_fw,
9191 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9192 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9193 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9194 .init_firmware_loader = gaudi_init_firmware_loader,
9195 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9196 .state_dump_init = gaudi_state_dump_init,
9197 .get_sob_addr = gaudi_get_sob_addr,
9198 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9199 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9200 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9201 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9202 .access_dev_mem = hl_access_dev_mem,
9203 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9204 .send_device_activity = gaudi_send_device_activity,
9205 .set_dram_properties = gaudi_set_dram_properties,
9206 .set_binning_masks = gaudi_set_binning_masks,
9207 };
9208
9209 /**
9210 * gaudi_set_asic_funcs - set GAUDI function pointers
9211 *
9212 * @hdev: pointer to hl_device structure
9213 *
9214 */
gaudi_set_asic_funcs(struct hl_device * hdev)9215 void gaudi_set_asic_funcs(struct hl_device *hdev)
9216 {
9217 hdev->asic_funcs = &gaudi_funcs;
9218 }
9219