1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudiP.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 #include "../include/hw_ip/mmu/mmu_v1_1.h"
11 #include "../include/gaudi/gaudi_masks.h"
12 #include "../include/gaudi/gaudi_fw_if.h"
13 #include "../include/gaudi/gaudi_reg_map.h"
14 #include "../include/gaudi/gaudi_async_ids_map_extended.h"
15
16 #include <linux/module.h>
17 #include <linux/pci.h>
18 #include <linux/firmware.h>
19 #include <linux/hwmon.h>
20 #include <linux/iommu.h>
21 #include <linux/seq_file.h>
22
23 /*
24 * Gaudi security scheme:
25 *
26 * 1. Host is protected by:
27 * - Range registers
28 * - MMU
29 *
30 * 2. DDR is protected by:
31 * - Range registers (protect the first 512MB)
32 *
33 * 3. Configuration is protected by:
34 * - Range registers
35 * - Protection bits
36 *
37 * MMU is always enabled.
38 *
39 * QMAN DMA channels 0,1 (PCI DMAN):
40 * - DMA is not secured.
41 * - PQ and CQ are secured.
42 * - CP is secured: The driver needs to parse CB but WREG should be allowed
43 * because of TDMA (tensor DMA). Hence, WREG is always not
44 * secured.
45 *
46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
47 * channel 0 to be secured, execute the DMA and change it back to not secured.
48 * Currently, the driver doesn't use the DMA while there are compute jobs
49 * running.
50 *
51 * The current use cases for the driver to use the DMA are:
52 * - Clear SRAM on context switch (happens on context switch when device is
53 * idle)
54 * - MMU page tables area clear (happens on init)
55 *
56 * QMAN DMA 2-7, TPC, MME, NIC:
57 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
58 * CQ, CP and the engine are not secured
59 *
60 */
61
62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
65
66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
69
70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
71
72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
76
77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */
84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */
86
87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
88
89 #define GAUDI_MAX_STRING_LEN 20
90
91 #define GAUDI_CB_POOL_CB_CNT 512
92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
93
94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
95
96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
97
98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16
99
100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
101
102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */
103
104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */
105
106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010")
107
108 #define MONITOR_SOB_STRING_SIZE 256
109
110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
111 GAUDI_QUEUE_ID_DMA_0_0,
112 GAUDI_QUEUE_ID_DMA_0_1,
113 GAUDI_QUEUE_ID_DMA_0_2,
114 GAUDI_QUEUE_ID_DMA_0_3,
115 GAUDI_QUEUE_ID_DMA_1_0,
116 GAUDI_QUEUE_ID_DMA_1_1,
117 GAUDI_QUEUE_ID_DMA_1_2,
118 GAUDI_QUEUE_ID_DMA_1_3
119 };
120
121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0,
123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1,
124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2,
125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3,
126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4,
127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5,
128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6,
129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7
130 };
131
132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
133 [0] = GAUDI_QUEUE_ID_DMA_0_0,
134 [1] = GAUDI_QUEUE_ID_DMA_0_1,
135 [2] = GAUDI_QUEUE_ID_DMA_0_2,
136 [3] = GAUDI_QUEUE_ID_DMA_0_3,
137 [4] = GAUDI_QUEUE_ID_DMA_1_0,
138 [5] = GAUDI_QUEUE_ID_DMA_1_1,
139 [6] = GAUDI_QUEUE_ID_DMA_1_2,
140 [7] = GAUDI_QUEUE_ID_DMA_1_3,
141 };
142
143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
144 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
149 [PACKET_REPEAT] = sizeof(struct packet_repeat),
150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
151 [PACKET_FENCE] = sizeof(struct packet_fence),
152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
153 [PACKET_NOP] = sizeof(struct packet_nop),
154 [PACKET_STOP] = sizeof(struct packet_stop),
155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
156 [PACKET_WAIT] = sizeof(struct packet_wait),
157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
158 };
159
validate_packet_id(enum packet_id id)160 static inline bool validate_packet_id(enum packet_id id)
161 {
162 switch (id) {
163 case PACKET_WREG_32:
164 case PACKET_WREG_BULK:
165 case PACKET_MSG_LONG:
166 case PACKET_MSG_SHORT:
167 case PACKET_CP_DMA:
168 case PACKET_REPEAT:
169 case PACKET_MSG_PROT:
170 case PACKET_FENCE:
171 case PACKET_LIN_DMA:
172 case PACKET_NOP:
173 case PACKET_STOP:
174 case PACKET_ARB_POINT:
175 case PACKET_WAIT:
176 case PACKET_LOAD_AND_EXE:
177 return true;
178 default:
179 return false;
180 }
181 }
182
183 static const char * const
184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
185 "tpc_address_exceed_slm",
186 "tpc_div_by_0",
187 "tpc_spu_mac_overflow",
188 "tpc_spu_addsub_overflow",
189 "tpc_spu_abs_overflow",
190 "tpc_spu_fp_dst_nan_inf",
191 "tpc_spu_fp_dst_denorm",
192 "tpc_vpu_mac_overflow",
193 "tpc_vpu_addsub_overflow",
194 "tpc_vpu_abs_overflow",
195 "tpc_vpu_fp_dst_nan_inf",
196 "tpc_vpu_fp_dst_denorm",
197 "tpc_assertions",
198 "tpc_illegal_instruction",
199 "tpc_pc_wrap_around",
200 "tpc_qm_sw_err",
201 "tpc_hbw_rresp_err",
202 "tpc_hbw_bresp_err",
203 "tpc_lbw_rresp_err",
204 "tpc_lbw_bresp_err"
205 };
206
207 static const char * const
208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
209 "PQ AXI HBW error",
210 "CQ AXI HBW error",
211 "CP AXI HBW error",
212 "CP error due to undefined OPCODE",
213 "CP encountered STOP OPCODE",
214 "CP AXI LBW error",
215 "CP WRREG32 or WRBULK returned error",
216 "N/A",
217 "FENCE 0 inc over max value and clipped",
218 "FENCE 1 inc over max value and clipped",
219 "FENCE 2 inc over max value and clipped",
220 "FENCE 3 inc over max value and clipped",
221 "FENCE 0 dec under min value and clipped",
222 "FENCE 1 dec under min value and clipped",
223 "FENCE 2 dec under min value and clipped",
224 "FENCE 3 dec under min value and clipped"
225 };
226
227 static const char * const
228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
229 "Choice push while full error",
230 "Choice Q watchdog error",
231 "MSG AXI LBW returned with error"
232 };
233
234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */
309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */
310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */
311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */
312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */
313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */
314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */
315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */
316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */
317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */
318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */
319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */
320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */
321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */
322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */
323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */
324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */
325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */
326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */
327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */
328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */
329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */
330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */
331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */
332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */
333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */
334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */
335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */
336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */
337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */
338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */
339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */
340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */
341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */
342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */
343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */
344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */
345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */
346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */
347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
348 };
349
350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" },
357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
378 };
379
380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" },
383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
392 };
393
394 static s64 gaudi_state_dump_specs_props[] = {
395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
398 [SP_MON_OBJ_WR_ADDR_LOW] =
399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
400 [SP_MON_OBJ_WR_ADDR_HIGH] =
401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
422 [SP_FENCE0_CNT_OFFSET] =
423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
424 [SP_FENCE0_RDATA_OFFSET] =
425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
427 [SP_NUM_CORES] = 1,
428 };
429
430 static const int gaudi_queue_id_to_engine_id[] = {
431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0,
432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1,
433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE,
434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2,
435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3,
436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4,
437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5,
438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6,
439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7,
440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0,
441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2,
442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0,
443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1,
444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2,
445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3,
446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4,
447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5,
448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6,
449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7,
450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0,
451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1,
452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2,
453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3,
454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4,
455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5,
456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6,
457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7,
458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8,
459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9,
460 };
461
462 /* The order here is opposite to the order of the indexing in the h/w.
463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
464 */
465 static const char * const gaudi_sync_manager_names[] = {
466 "SYNC_MGR_E_N",
467 "SYNC_MGR_W_N",
468 "SYNC_MGR_E_S",
469 "SYNC_MGR_W_S",
470 NULL
471 };
472
473 struct ecc_info_extract_params {
474 u64 block_address;
475 u32 num_memories;
476 bool derr;
477 };
478
479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
480 u64 phys_addr);
481 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
482 struct hl_cs_job *job);
483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
484 u32 size, u64 val);
485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
486 u32 num_regs, u32 val);
487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
488 u32 tpc_id);
489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
490 static int gaudi_cpucp_info_get(struct hl_device *hdev);
491 static void gaudi_disable_clock_gating(struct hl_device *hdev);
492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
494 u32 size, bool eb);
495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
496 struct hl_gen_wait_properties *prop);
497 static inline enum hl_collective_mode
get_collective_mode(struct hl_device * hdev,u32 queue_id)498 get_collective_mode(struct hl_device *hdev, u32 queue_id)
499 {
500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT)
501 return HL_COLLECTIVE_MASTER;
502
503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 &&
504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3)
505 return HL_COLLECTIVE_SLAVE;
506
507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 &&
508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3)
509 return HL_COLLECTIVE_SLAVE;
510
511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 &&
512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3)
513 return HL_COLLECTIVE_SLAVE;
514
515 return HL_COLLECTIVE_NOT_SUPPORTED;
516 }
517
set_default_power_values(struct hl_device * hdev)518 static inline void set_default_power_values(struct hl_device *hdev)
519 {
520 struct asic_fixed_properties *prop = &hdev->asic_prop;
521
522 if (hdev->card_type == cpucp_card_type_pmc) {
523 prop->max_power_default = MAX_POWER_DEFAULT_PMC;
524
525 if (prop->fw_security_enabled)
526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
527 else
528 prop->dc_power_default = DC_POWER_DEFAULT_PMC;
529 } else {
530 prop->max_power_default = MAX_POWER_DEFAULT_PCI;
531 prop->dc_power_default = DC_POWER_DEFAULT_PCI;
532 }
533 }
534
gaudi_set_fixed_properties(struct hl_device * hdev)535 static int gaudi_set_fixed_properties(struct hl_device *hdev)
536 {
537 struct asic_fixed_properties *prop = &hdev->asic_prop;
538 u32 num_sync_stream_queues = 0;
539 int i;
540
541 prop->max_queues = GAUDI_QUEUE_ID_SIZE;
542 prop->hw_queues_props = kzalloc_objs(struct hw_queue_properties,
543 prop->max_queues);
544
545 if (!prop->hw_queues_props)
546 return -ENOMEM;
547
548 for (i = 0 ; i < prop->max_queues ; i++) {
549 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
550 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
551 prop->hw_queues_props[i].driver_only = 0;
552 prop->hw_queues_props[i].supports_sync_stream = 1;
553 prop->hw_queues_props[i].cb_alloc_flags =
554 CB_ALLOC_KERNEL;
555 num_sync_stream_queues++;
556 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
557 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
558 prop->hw_queues_props[i].driver_only = 1;
559 prop->hw_queues_props[i].supports_sync_stream = 0;
560 prop->hw_queues_props[i].cb_alloc_flags =
561 CB_ALLOC_KERNEL;
562 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
563 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
564 prop->hw_queues_props[i].driver_only = 0;
565 prop->hw_queues_props[i].supports_sync_stream = 0;
566 prop->hw_queues_props[i].cb_alloc_flags =
567 CB_ALLOC_USER;
568
569 }
570 prop->hw_queues_props[i].collective_mode =
571 get_collective_mode(hdev, i);
572 }
573
574 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
575 prop->cfg_base_address = CFG_BASE;
576 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
577 prop->host_base_address = HOST_PHYS_BASE;
578 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE;
579 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
580 prop->completion_mode = HL_COMPLETION_MODE_JOB;
581 prop->collective_first_sob = 0;
582 prop->collective_first_mon = 0;
583
584 /* 2 SOBs per internal queue stream are reserved for collective */
585 prop->sync_stream_first_sob =
586 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR)
587 * QMAN_STREAMS * HL_RSVD_SOBS;
588
589 /* 1 monitor per internal queue stream are reserved for collective
590 * 2 monitors per external queue stream are reserved for collective
591 */
592 prop->sync_stream_first_mon =
593 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) +
594 (NUMBER_OF_EXT_HW_QUEUES * 2);
595
596 prop->dram_base_address = DRAM_PHYS_BASE;
597 prop->dram_size = GAUDI_HBM_SIZE_32GB;
598 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
599 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
600
601 prop->sram_base_address = SRAM_BASE_ADDR;
602 prop->sram_size = SRAM_SIZE;
603 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
604 prop->sram_user_base_address =
605 prop->sram_base_address + SRAM_USER_BASE_OFFSET;
606
607 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR;
608 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE;
609
610 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
611 if (hdev->pldm)
612 prop->mmu_pgt_size = 0x800000; /* 8MB */
613 else
614 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
615 prop->mmu_pte_size = HL_PTE_SIZE;
616 prop->dram_page_size = PAGE_SIZE_2MB;
617 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
618 prop->dram_supports_virtual_memory = false;
619
620 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT;
621 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT;
622 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT;
623 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT;
624 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT;
625 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK;
626 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK;
627 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK;
628 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK;
629 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK;
630 prop->pmmu.start_addr = VA_HOST_SPACE_START;
631 prop->pmmu.end_addr =
632 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
633 prop->pmmu.page_size = PAGE_SIZE_4KB;
634 prop->pmmu.num_hops = MMU_ARCH_5_HOPS;
635 prop->pmmu.last_mask = LAST_MASK;
636 /* TODO: will be duplicated until implementing per-MMU props */
637 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
638 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE;
639
640 /* PMMU and HPMMU are the same except of page size */
641 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
642 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
643
644 /* shifts and masks are the same in PMMU and DMMU */
645 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
646 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
647 prop->dmmu.end_addr = VA_HOST_SPACE_END;
648 prop->dmmu.page_size = PAGE_SIZE_2MB;
649 prop->dmmu.pgt_size = prop->mmu_pgt_size;
650
651 prop->cfg_size = CFG_SIZE;
652 prop->max_asid = MAX_ASID;
653 prop->num_of_events = GAUDI_EVENT_SIZE;
654 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE;
655 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
656
657 set_default_power_values(hdev);
658
659 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
660 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
661
662 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
663 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
664
665 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
666 CARD_NAME_MAX_LEN);
667
668 prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
669
670 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] =
671 prop->sync_stream_first_sob +
672 (num_sync_stream_queues * HL_RSVD_SOBS);
673 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] =
674 prop->sync_stream_first_mon +
675 (num_sync_stream_queues * HL_RSVD_MONS);
676
677 prop->first_available_user_interrupt = USHRT_MAX;
678 prop->tpc_interrupt_id = USHRT_MAX;
679
680 /* single msi */
681 prop->eq_interrupt_id = 0;
682
683 for (i = 0 ; i < HL_MAX_DCORES ; i++)
684 prop->first_available_cq[i] = USHRT_MAX;
685
686 prop->fw_cpu_boot_dev_sts0_valid = false;
687 prop->fw_cpu_boot_dev_sts1_valid = false;
688 prop->hard_reset_done_by_fw = false;
689 prop->gic_interrupts_enable = true;
690
691 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
692
693 prop->clk_pll_index = HL_GAUDI_MME_PLL;
694 prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
695
696 prop->use_get_power_for_reset_history = true;
697
698 prop->configurable_stop_on_err = true;
699
700 prop->set_max_power_on_device_init = true;
701
702 prop->dma_mask = 48;
703
704 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL;
705
706 return 0;
707 }
708
gaudi_pci_bars_map(struct hl_device * hdev)709 static int gaudi_pci_bars_map(struct hl_device *hdev)
710 {
711 static const char * const name[] = {"SRAM", "CFG", "HBM"};
712 bool is_wc[3] = {false, false, true};
713 int rc;
714
715 rc = hl_pci_bars_map(hdev, name, is_wc);
716 if (rc)
717 return rc;
718
719 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
720 (CFG_BASE - SPI_FLASH_BASE_ADDR);
721
722 return 0;
723 }
724
gaudi_set_hbm_bar_base(struct hl_device * hdev,u64 addr)725 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
726 {
727 struct gaudi_device *gaudi = hdev->asic_specific;
728 struct hl_inbound_pci_region pci_region;
729 u64 old_addr = addr;
730 int rc;
731
732 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
733 return old_addr;
734
735 if (hdev->asic_prop.iatu_done_by_fw)
736 return U64_MAX;
737
738 /* Inbound Region 2 - Bar 4 - Point to HBM */
739 pci_region.mode = PCI_BAR_MATCH_MODE;
740 pci_region.bar = HBM_BAR_ID;
741 pci_region.addr = addr;
742 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
743 if (rc)
744 return U64_MAX;
745
746 if (gaudi) {
747 old_addr = gaudi->hbm_bar_cur_addr;
748 gaudi->hbm_bar_cur_addr = addr;
749 }
750
751 return old_addr;
752 }
753
gaudi_init_iatu(struct hl_device * hdev)754 static int gaudi_init_iatu(struct hl_device *hdev)
755 {
756 struct hl_inbound_pci_region inbound_region;
757 struct hl_outbound_pci_region outbound_region;
758 int rc;
759
760 if (hdev->asic_prop.iatu_done_by_fw)
761 return 0;
762
763 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
764 inbound_region.mode = PCI_BAR_MATCH_MODE;
765 inbound_region.bar = SRAM_BAR_ID;
766 inbound_region.addr = SRAM_BASE_ADDR;
767 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
768 if (rc)
769 goto done;
770
771 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
772 inbound_region.mode = PCI_BAR_MATCH_MODE;
773 inbound_region.bar = CFG_BAR_ID;
774 inbound_region.addr = SPI_FLASH_BASE_ADDR;
775 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
776 if (rc)
777 goto done;
778
779 /* Inbound Region 2 - Bar 4 - Point to HBM */
780 inbound_region.mode = PCI_BAR_MATCH_MODE;
781 inbound_region.bar = HBM_BAR_ID;
782 inbound_region.addr = DRAM_PHYS_BASE;
783 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
784 if (rc)
785 goto done;
786
787 /* Outbound Region 0 - Point to Host */
788 outbound_region.addr = HOST_PHYS_BASE;
789 outbound_region.size = HOST_PHYS_SIZE;
790 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
791
792 done:
793 return rc;
794 }
795
gaudi_get_hw_state(struct hl_device * hdev)796 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
797 {
798 return RREG32(mmHW_STATE);
799 }
800
gaudi_early_init(struct hl_device * hdev)801 static int gaudi_early_init(struct hl_device *hdev)
802 {
803 struct asic_fixed_properties *prop = &hdev->asic_prop;
804 struct pci_dev *pdev = hdev->pdev;
805 resource_size_t pci_bar_size;
806 u32 fw_boot_status;
807 int rc;
808
809 rc = gaudi_set_fixed_properties(hdev);
810 if (rc) {
811 dev_err(hdev->dev, "Failed setting fixed properties\n");
812 return rc;
813 }
814
815 /* Check BAR sizes */
816 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID);
817
818 if (pci_bar_size != SRAM_BAR_SIZE) {
819 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
820 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE);
821 rc = -ENODEV;
822 goto free_queue_props;
823 }
824
825 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID);
826
827 if (pci_bar_size != CFG_BAR_SIZE) {
828 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
829 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
830 rc = -ENODEV;
831 goto free_queue_props;
832 }
833
834 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
835 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
836
837 /* If FW security is enabled at this point it means no access to ELBI */
838 if (hdev->asic_prop.fw_security_enabled) {
839 hdev->asic_prop.iatu_done_by_fw = true;
840
841 /*
842 * GIC-security-bit can ONLY be set by CPUCP, so in this stage
843 * decision can only be taken based on PCI ID security.
844 */
845 hdev->asic_prop.gic_interrupts_enable = false;
846 goto pci_init;
847 }
848
849 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0,
850 &fw_boot_status);
851 if (rc)
852 goto free_queue_props;
853
854 /* Check whether FW is configuring iATU */
855 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) &&
856 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN))
857 hdev->asic_prop.iatu_done_by_fw = true;
858
859 pci_init:
860 rc = hl_pci_init(hdev);
861 if (rc)
862 goto free_queue_props;
863
864 /* Before continuing in the initialization, we need to read the preboot
865 * version to determine whether we run with a security-enabled firmware
866 */
867 rc = hl_fw_read_preboot_status(hdev);
868 if (rc) {
869 if (hdev->reset_on_preboot_fail)
870 /* we are already on failure flow, so don't check if hw_fini fails. */
871 hdev->asic_funcs->hw_fini(hdev, true, false);
872 goto pci_fini;
873 }
874
875 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
876 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
877 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
878 if (rc) {
879 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
880 goto pci_fini;
881 }
882 }
883
884 return 0;
885
886 pci_fini:
887 hl_pci_fini(hdev);
888 free_queue_props:
889 kfree(hdev->asic_prop.hw_queues_props);
890 return rc;
891 }
892
gaudi_early_fini(struct hl_device * hdev)893 static int gaudi_early_fini(struct hl_device *hdev)
894 {
895 kfree(hdev->asic_prop.hw_queues_props);
896 hl_pci_fini(hdev);
897
898 return 0;
899 }
900
901 /**
902 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
903 *
904 * @hdev: pointer to hl_device structure
905 *
906 */
gaudi_fetch_psoc_frequency(struct hl_device * hdev)907 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
908 {
909 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel;
910 struct asic_fixed_properties *prop = &hdev->asic_prop;
911 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq;
912 int rc;
913
914 if ((hdev->fw_components & FW_TYPE_LINUX) &&
915 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) {
916 struct gaudi_device *gaudi = hdev->asic_specific;
917
918 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
919 return 0;
920
921 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr);
922
923 if (rc)
924 return rc;
925
926 freq = pll_freq_arr[2];
927 } else {
928 /* Backward compatibility */
929 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
930 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
931 nr = RREG32(mmPSOC_CPU_PLL_NR);
932 nf = RREG32(mmPSOC_CPU_PLL_NF);
933 od = RREG32(mmPSOC_CPU_PLL_OD);
934
935 if (div_sel == DIV_SEL_REF_CLK ||
936 div_sel == DIV_SEL_DIVIDED_REF) {
937 if (div_sel == DIV_SEL_REF_CLK)
938 freq = PLL_REF_CLK;
939 else
940 freq = PLL_REF_CLK / (div_fctr + 1);
941 } else if (div_sel == DIV_SEL_PLL_CLK ||
942 div_sel == DIV_SEL_DIVIDED_PLL) {
943 pll_clk = PLL_REF_CLK * (nf + 1) /
944 ((nr + 1) * (od + 1));
945 if (div_sel == DIV_SEL_PLL_CLK)
946 freq = pll_clk;
947 else
948 freq = pll_clk / (div_fctr + 1);
949 } else {
950 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel);
951 freq = 0;
952 }
953 }
954
955 prop->psoc_timestamp_frequency = freq;
956 prop->psoc_pci_pll_nr = nr;
957 prop->psoc_pci_pll_nf = nf;
958 prop->psoc_pci_pll_od = od;
959 prop->psoc_pci_pll_div_factor = div_fctr;
960
961 return 0;
962 }
963
_gaudi_init_tpc_mem(struct hl_device * hdev,dma_addr_t tpc_kernel_src_addr,u32 tpc_kernel_size)964 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
965 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
966 {
967 struct asic_fixed_properties *prop = &hdev->asic_prop;
968 struct packet_lin_dma *init_tpc_mem_pkt;
969 struct hl_cs_job *job;
970 struct hl_cb *cb;
971 u64 dst_addr;
972 u32 cb_size, ctl;
973 u8 tpc_id;
974 int rc;
975
976 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
977 if (!cb)
978 return -EFAULT;
979
980 init_tpc_mem_pkt = cb->kernel_address;
981 cb_size = sizeof(*init_tpc_mem_pkt);
982 memset(init_tpc_mem_pkt, 0, cb_size);
983
984 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
985
986 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
987 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
988 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
990
991 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
992
993 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
994
995 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */
996 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK,
997 round_up(prop->sram_user_base_address, SZ_8K));
998 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
999
1000 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
1001 if (!job) {
1002 dev_err(hdev->dev, "Failed to allocate a new job\n");
1003 rc = -ENOMEM;
1004 goto release_cb;
1005 }
1006
1007 job->id = 0;
1008 job->user_cb = cb;
1009 atomic_inc(&job->user_cb->cs_cnt);
1010 job->user_cb_size = cb_size;
1011 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
1012 job->patched_cb = job->user_cb;
1013 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
1014
1015 hl_debugfs_add_job(hdev, job);
1016
1017 rc = gaudi_send_job_on_qman0(hdev, job);
1018
1019 if (rc)
1020 goto free_job;
1021
1022 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
1023 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
1024 if (rc)
1025 break;
1026 }
1027
1028 free_job:
1029 hl_userptr_delete_list(hdev, &job->userptr_list);
1030 hl_debugfs_remove_job(hdev, job);
1031 kfree(job);
1032 atomic_dec(&cb->cs_cnt);
1033
1034 release_cb:
1035 hl_cb_put(cb);
1036 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1037
1038 return rc;
1039 }
1040
1041 /*
1042 * gaudi_init_tpc_mem() - Initialize TPC memories.
1043 * @hdev: Pointer to hl_device structure.
1044 *
1045 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
1046 *
1047 * Return: 0 for success, negative value for error.
1048 */
gaudi_init_tpc_mem(struct hl_device * hdev)1049 static int gaudi_init_tpc_mem(struct hl_device *hdev)
1050 {
1051 const struct firmware *fw;
1052 size_t fw_size;
1053 void *cpu_addr;
1054 dma_addr_t dma_handle;
1055 int rc, count = 5;
1056
1057 again:
1058 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
1059 if (rc == -EINTR && count-- > 0) {
1060 msleep(50);
1061 goto again;
1062 }
1063
1064 if (rc) {
1065 dev_err(hdev->dev, "Failed to load firmware file %s\n",
1066 GAUDI_TPC_FW_FILE);
1067 goto out;
1068 }
1069
1070 fw_size = fw->size;
1071 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO);
1072 if (!cpu_addr) {
1073 dev_err(hdev->dev,
1074 "Failed to allocate %zu of dma memory for TPC kernel\n",
1075 fw_size);
1076 rc = -ENOMEM;
1077 goto out;
1078 }
1079
1080 memcpy(cpu_addr, fw->data, fw_size);
1081
1082 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
1083
1084 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle);
1085
1086 out:
1087 release_firmware(fw);
1088 return rc;
1089 }
1090
gaudi_collective_map_sobs(struct hl_device * hdev,u32 stream)1091 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream)
1092 {
1093 struct gaudi_device *gaudi = hdev->asic_specific;
1094 struct gaudi_collective_properties *prop = &gaudi->collective_props;
1095 struct hl_hw_queue *q;
1096 u32 i, sob_id, sob_group_id, queue_id;
1097
1098 /* Iterate through SOB groups and assign a SOB for each slave queue */
1099 sob_group_id =
1100 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream];
1101 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id;
1102
1103 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1104 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
1105 q = &hdev->kernel_queues[queue_id + (4 * i)];
1106 q->sync_stream_prop.collective_sob_id = sob_id + i;
1107 }
1108
1109 /* Both DMA5 and TPC7 use the same resources since only a single
1110 * engine need to participate in the reduction process
1111 */
1112 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1113 q = &hdev->kernel_queues[queue_id];
1114 q->sync_stream_prop.collective_sob_id =
1115 sob_id + NIC_NUMBER_OF_ENGINES;
1116
1117 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1118 q = &hdev->kernel_queues[queue_id];
1119 q->sync_stream_prop.collective_sob_id =
1120 sob_id + NIC_NUMBER_OF_ENGINES;
1121 }
1122
gaudi_sob_group_hw_reset(struct kref * ref)1123 static void gaudi_sob_group_hw_reset(struct kref *ref)
1124 {
1125 struct gaudi_hw_sob_group *hw_sob_group =
1126 container_of(ref, struct gaudi_hw_sob_group, kref);
1127 struct hl_device *hdev = hw_sob_group->hdev;
1128 int i;
1129
1130 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
1131 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
1132 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
1133
1134 kref_init(&hw_sob_group->kref);
1135 }
1136
gaudi_sob_group_reset_error(struct kref * ref)1137 static void gaudi_sob_group_reset_error(struct kref *ref)
1138 {
1139 struct gaudi_hw_sob_group *hw_sob_group =
1140 container_of(ref, struct gaudi_hw_sob_group, kref);
1141 struct hl_device *hdev = hw_sob_group->hdev;
1142
1143 dev_crit(hdev->dev,
1144 "SOB release shouldn't be called here, base_sob_id: %d\n",
1145 hw_sob_group->base_sob_id);
1146 }
1147
gaudi_collective_mstr_sob_mask_set(struct gaudi_device * gaudi)1148 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi)
1149 {
1150 struct gaudi_collective_properties *prop;
1151 int i;
1152
1153 prop = &gaudi->collective_props;
1154
1155 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask));
1156
1157 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++)
1158 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i))
1159 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1160 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1161 /* Set collective engine bit */
1162 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |=
1163 BIT(i % HL_MAX_SOBS_PER_MONITOR);
1164 }
1165
gaudi_collective_init(struct hl_device * hdev)1166 static int gaudi_collective_init(struct hl_device *hdev)
1167 {
1168 u32 i, sob_id, reserved_sobs_per_group;
1169 struct gaudi_collective_properties *prop;
1170 struct gaudi_device *gaudi;
1171
1172 gaudi = hdev->asic_specific;
1173 prop = &gaudi->collective_props;
1174 sob_id = hdev->asic_prop.collective_first_sob;
1175
1176 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */
1177 reserved_sobs_per_group =
1178 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR);
1179
1180 /* Init SOB groups */
1181 for (i = 0 ; i < NUM_SOB_GROUPS; i++) {
1182 prop->hw_sob_group[i].hdev = hdev;
1183 prop->hw_sob_group[i].base_sob_id = sob_id;
1184 sob_id += reserved_sobs_per_group;
1185 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref);
1186 }
1187
1188 for (i = 0 ; i < QMAN_STREAMS; i++) {
1189 prop->next_sob_group_val[i] = 1;
1190 prop->curr_sob_group_idx[i] = 0;
1191 gaudi_collective_map_sobs(hdev, i);
1192 }
1193
1194 gaudi_collective_mstr_sob_mask_set(gaudi);
1195
1196 return 0;
1197 }
1198
gaudi_reset_sob_group(struct hl_device * hdev,u16 sob_group)1199 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group)
1200 {
1201 struct gaudi_device *gaudi = hdev->asic_specific;
1202 struct gaudi_collective_properties *cprop = &gaudi->collective_props;
1203
1204 kref_put(&cprop->hw_sob_group[sob_group].kref,
1205 gaudi_sob_group_hw_reset);
1206 }
1207
gaudi_collective_master_init_job(struct hl_device * hdev,struct hl_cs_job * job,u32 stream,u32 sob_group_offset)1208 static void gaudi_collective_master_init_job(struct hl_device *hdev,
1209 struct hl_cs_job *job, u32 stream, u32 sob_group_offset)
1210 {
1211 u32 master_sob_base, master_monitor, queue_id, cb_size = 0;
1212 struct gaudi_collective_properties *cprop;
1213 struct hl_gen_wait_properties wait_prop;
1214 struct hl_sync_stream_properties *prop;
1215 struct gaudi_device *gaudi;
1216
1217 gaudi = hdev->asic_specific;
1218 cprop = &gaudi->collective_props;
1219 queue_id = job->hw_queue_id;
1220 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1221
1222 master_sob_base =
1223 cprop->hw_sob_group[sob_group_offset].base_sob_id;
1224 master_monitor = prop->collective_mstr_mon_id[0];
1225
1226 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
1227
1228 dev_dbg(hdev->dev,
1229 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1230 master_sob_base, cprop->mstr_sob_mask[0],
1231 cprop->next_sob_group_val[stream],
1232 master_monitor, queue_id);
1233
1234 wait_prop.data = (void *) job->patched_cb;
1235 wait_prop.sob_base = master_sob_base;
1236 wait_prop.sob_mask = cprop->mstr_sob_mask[0];
1237 wait_prop.sob_val = cprop->next_sob_group_val[stream];
1238 wait_prop.mon_id = master_monitor;
1239 wait_prop.q_idx = queue_id;
1240 wait_prop.size = cb_size;
1241 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1242
1243 master_sob_base += HL_MAX_SOBS_PER_MONITOR;
1244 master_monitor = prop->collective_mstr_mon_id[1];
1245
1246 dev_dbg(hdev->dev,
1247 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
1248 master_sob_base, cprop->mstr_sob_mask[1],
1249 cprop->next_sob_group_val[stream],
1250 master_monitor, queue_id);
1251
1252 wait_prop.sob_base = master_sob_base;
1253 wait_prop.sob_mask = cprop->mstr_sob_mask[1];
1254 wait_prop.mon_id = master_monitor;
1255 wait_prop.size = cb_size;
1256 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1257 }
1258
gaudi_collective_slave_init_job(struct hl_device * hdev,struct hl_cs_job * job,struct hl_cs_compl * cs_cmpl)1259 static void gaudi_collective_slave_init_job(struct hl_device *hdev,
1260 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
1261 {
1262 struct hl_gen_wait_properties wait_prop;
1263 struct hl_sync_stream_properties *prop;
1264 u32 queue_id, cb_size = 0;
1265
1266 queue_id = job->hw_queue_id;
1267 prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
1268
1269 if (job->cs->encaps_signals) {
1270 /* use the encaps signal handle store earlier in the flow
1271 * and set the SOB information from the encaps
1272 * signals handle
1273 */
1274 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
1275 cs_cmpl);
1276
1277 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n",
1278 job->cs->sequence,
1279 cs_cmpl->hw_sob->sob_id,
1280 cs_cmpl->sob_val);
1281 }
1282
1283 /* Add to wait CBs using slave monitor */
1284 wait_prop.data = (void *) job->user_cb;
1285 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
1286 wait_prop.sob_mask = 0x1;
1287 wait_prop.sob_val = cs_cmpl->sob_val;
1288 wait_prop.mon_id = prop->collective_slave_mon_id;
1289 wait_prop.q_idx = queue_id;
1290 wait_prop.size = cb_size;
1291
1292 dev_dbg(hdev->dev,
1293 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
1294 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
1295 prop->collective_slave_mon_id, queue_id);
1296
1297 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop);
1298
1299 dev_dbg(hdev->dev,
1300 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n",
1301 prop->collective_sob_id, queue_id);
1302
1303 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb,
1304 prop->collective_sob_id, cb_size, false);
1305 }
1306
gaudi_collective_wait_init_cs(struct hl_cs * cs)1307 static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
1308 {
1309 struct hl_cs_compl *signal_cs_cmpl =
1310 container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
1311 struct hl_cs_compl *cs_cmpl =
1312 container_of(cs->fence, struct hl_cs_compl, base_fence);
1313 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
1314 struct gaudi_collective_properties *cprop;
1315 u32 stream, queue_id, sob_group_offset;
1316 struct gaudi_device *gaudi;
1317 struct hl_device *hdev;
1318 struct hl_cs_job *job;
1319 struct hl_ctx *ctx;
1320
1321 ctx = cs->ctx;
1322 hdev = ctx->hdev;
1323 gaudi = hdev->asic_specific;
1324 cprop = &gaudi->collective_props;
1325
1326 if (cs->encaps_signals) {
1327 cs_cmpl->hw_sob = handle->hw_sob;
1328 /* at this checkpoint we only need the hw_sob pointer
1329 * for the completion check before start going over the jobs
1330 * of the master/slaves, the sob_value will be taken later on
1331 * in gaudi_collective_slave_init_job depends on each
1332 * job wait offset value.
1333 */
1334 cs_cmpl->sob_val = 0;
1335 } else {
1336 /* copy the SOB id and value of the signal CS */
1337 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
1338 cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
1339 }
1340
1341 /* check again if the signal cs already completed.
1342 * if yes then don't send any wait cs since the hw_sob
1343 * could be in reset already. if signal is not completed
1344 * then get refcount to hw_sob to prevent resetting the sob
1345 * while wait cs is not submitted.
1346 * note that this check is protected by two locks,
1347 * hw queue lock and completion object lock,
1348 * and the same completion object lock also protects
1349 * the hw_sob reset handler function.
1350 * The hw_queue lock prevent out of sync of hw_sob
1351 * refcount value, changed by signal/wait flows.
1352 */
1353 spin_lock(&signal_cs_cmpl->lock);
1354
1355 if (completion_done(&cs->signal_fence->completion)) {
1356 spin_unlock(&signal_cs_cmpl->lock);
1357 return -EINVAL;
1358 }
1359 /* Increment kref since all slave queues are now waiting on it */
1360 kref_get(&cs_cmpl->hw_sob->kref);
1361
1362 spin_unlock(&signal_cs_cmpl->lock);
1363
1364 /* Calculate the stream from collective master queue (1st job) */
1365 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
1366 stream = job->hw_queue_id % 4;
1367 sob_group_offset =
1368 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream];
1369
1370 list_for_each_entry(job, &cs->job_list, cs_node) {
1371 queue_id = job->hw_queue_id;
1372
1373 if (hdev->kernel_queues[queue_id].collective_mode ==
1374 HL_COLLECTIVE_MASTER)
1375 gaudi_collective_master_init_job(hdev, job, stream,
1376 sob_group_offset);
1377 else
1378 gaudi_collective_slave_init_job(hdev, job, cs_cmpl);
1379 }
1380
1381 cs_cmpl->sob_group = sob_group_offset;
1382
1383 /* Handle sob group kref and wraparound */
1384 kref_get(&cprop->hw_sob_group[sob_group_offset].kref);
1385 cprop->next_sob_group_val[stream]++;
1386
1387 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) {
1388 /*
1389 * Decrement as we reached the max value.
1390 * The release function won't be called here as we've
1391 * just incremented the refcount.
1392 */
1393 kref_put(&cprop->hw_sob_group[sob_group_offset].kref,
1394 gaudi_sob_group_reset_error);
1395 cprop->next_sob_group_val[stream] = 1;
1396 /* only two SOBs are currently in use */
1397 cprop->curr_sob_group_idx[stream] =
1398 (cprop->curr_sob_group_idx[stream] + 1) &
1399 (HL_RSVD_SOBS - 1);
1400
1401 gaudi_collective_map_sobs(hdev, stream);
1402
1403 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n",
1404 cprop->curr_sob_group_idx[stream], stream);
1405 }
1406
1407 mb();
1408 hl_fence_put(cs->signal_fence);
1409 cs->signal_fence = NULL;
1410
1411 return 0;
1412 }
1413
gaudi_get_patched_cb_extra_size(u32 user_cb_size)1414 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size)
1415 {
1416 u32 cacheline_end, additional_commands;
1417
1418 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE);
1419 additional_commands = sizeof(struct packet_msg_prot) * 2;
1420
1421 if (user_cb_size + additional_commands > cacheline_end)
1422 return cacheline_end - user_cb_size + additional_commands;
1423 else
1424 return additional_commands;
1425 }
1426
gaudi_collective_wait_create_job(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,enum hl_collective_mode mode,u32 queue_id,u32 wait_queue_id,u32 encaps_signal_offset)1427 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
1428 struct hl_ctx *ctx, struct hl_cs *cs,
1429 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
1430 u32 encaps_signal_offset)
1431 {
1432 struct hw_queue_properties *hw_queue_prop;
1433 struct hl_cs_counters_atomic *cntr;
1434 struct hl_cs_job *job;
1435 struct hl_cb *cb;
1436 u32 cb_size;
1437 bool patched_cb;
1438
1439 cntr = &hdev->aggregated_cs_counters;
1440
1441 if (mode == HL_COLLECTIVE_MASTER) {
1442 /* CB size of collective master queue contains
1443 * 4 msg short packets for monitor 1 configuration
1444 * 1 fence packet
1445 * 4 msg short packets for monitor 2 configuration
1446 * 1 fence packet
1447 * 2 msg prot packets for completion and MSI
1448 */
1449 cb_size = sizeof(struct packet_msg_short) * 8 +
1450 sizeof(struct packet_fence) * 2 +
1451 sizeof(struct packet_msg_prot) * 2;
1452 patched_cb = true;
1453 } else {
1454 /* CB size of collective slave queues contains
1455 * 4 msg short packets for monitor configuration
1456 * 1 fence packet
1457 * 1 additional msg short packet for sob signal
1458 */
1459 cb_size = sizeof(struct packet_msg_short) * 5 +
1460 sizeof(struct packet_fence);
1461 patched_cb = false;
1462 }
1463
1464 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id];
1465 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
1466 if (!job) {
1467 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1468 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1469 dev_err(hdev->dev, "Failed to allocate a new job\n");
1470 return -ENOMEM;
1471 }
1472
1473 /* Allocate internal mapped CB for non patched CBs */
1474 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb);
1475 if (!cb) {
1476 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
1477 atomic64_inc(&cntr->out_of_mem_drop_cnt);
1478 kfree(job);
1479 return -EFAULT;
1480 }
1481
1482 job->id = 0;
1483 job->cs = cs;
1484 job->user_cb = cb;
1485 atomic_inc(&job->user_cb->cs_cnt);
1486 job->user_cb_size = cb_size;
1487 job->hw_queue_id = queue_id;
1488
1489 /* since its guaranteed to have only one chunk in the collective wait
1490 * cs, we can use this chunk to set the encapsulated signal offset
1491 * in the jobs.
1492 */
1493 if (cs->encaps_signals)
1494 job->encaps_sig_wait_offset = encaps_signal_offset;
1495
1496 /*
1497 * No need in parsing, user CB is the patched CB.
1498 * We call hl_cb_destroy() out of two reasons - we don't need
1499 * the CB in the CB idr anymore and to decrement its refcount as
1500 * it was incremented inside hl_cb_kernel_create().
1501 */
1502 if (patched_cb)
1503 job->patched_cb = job->user_cb;
1504 else
1505 job->patched_cb = NULL;
1506
1507 job->job_cb_size = job->user_cb_size;
1508 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
1509
1510 /* increment refcount as for external queues we get completion */
1511 if (hw_queue_prop->type == QUEUE_TYPE_EXT)
1512 cs_get(cs);
1513
1514 cs->jobs_in_queue_cnt[job->hw_queue_id]++;
1515
1516 list_add_tail(&job->cs_node, &cs->job_list);
1517
1518 hl_debugfs_add_job(hdev, job);
1519
1520 return 0;
1521 }
1522
gaudi_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)1523 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
1524 struct hl_ctx *ctx, struct hl_cs *cs,
1525 u32 wait_queue_id, u32 collective_engine_id,
1526 u32 encaps_signal_offset)
1527 {
1528 struct gaudi_device *gaudi = hdev->asic_specific;
1529 struct hw_queue_properties *hw_queue_prop;
1530 u32 queue_id, collective_queue, num_jobs;
1531 u32 stream, nic_queue, nic_idx = 0;
1532 bool skip;
1533 int i, rc = 0;
1534
1535 /* Verify wait queue id is configured as master */
1536 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
1537 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
1538 dev_err(hdev->dev,
1539 "Queue %d is not configured as collective master\n",
1540 wait_queue_id);
1541 return -EINVAL;
1542 }
1543
1544 /* Verify engine id is supported */
1545 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 &&
1546 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) {
1547 dev_err(hdev->dev,
1548 "Collective wait does not support engine %u\n",
1549 collective_engine_id);
1550 return -EINVAL;
1551 }
1552
1553 stream = wait_queue_id % 4;
1554
1555 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5)
1556 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream;
1557 else
1558 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream;
1559
1560 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1;
1561 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream;
1562
1563 /* First job goes to the collective master queue, it will wait for
1564 * the collective slave queues to finish execution.
1565 * The synchronization is done using two monitors:
1566 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the
1567 * reduction engine (DMA5/TPC7).
1568 *
1569 * Rest of the jobs goes to the collective slave queues which will
1570 * all wait for the user to signal sob 'cs_cmpl->sob_val'.
1571 */
1572 for (i = 0 ; i < num_jobs ; i++) {
1573 if (i == 0) {
1574 queue_id = wait_queue_id;
1575 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1576 HL_COLLECTIVE_MASTER, queue_id,
1577 wait_queue_id, encaps_signal_offset);
1578 } else {
1579 if (nic_idx < NIC_NUMBER_OF_ENGINES) {
1580 if (gaudi->hw_cap_initialized &
1581 BIT(HW_CAP_NIC_SHIFT + nic_idx))
1582 skip = false;
1583 else
1584 skip = true;
1585
1586 queue_id = nic_queue;
1587 nic_queue += 4;
1588 nic_idx++;
1589
1590 if (skip)
1591 continue;
1592 } else {
1593 queue_id = collective_queue;
1594 }
1595
1596 rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
1597 HL_COLLECTIVE_SLAVE, queue_id,
1598 wait_queue_id, encaps_signal_offset);
1599 }
1600
1601 if (rc)
1602 return rc;
1603 }
1604
1605 return rc;
1606 }
1607
gaudi_late_init(struct hl_device * hdev)1608 static int gaudi_late_init(struct hl_device *hdev)
1609 {
1610 struct gaudi_device *gaudi = hdev->asic_specific;
1611 int rc;
1612
1613 rc = gaudi->cpucp_info_get(hdev);
1614 if (rc) {
1615 dev_err(hdev->dev, "Failed to get cpucp info\n");
1616 return rc;
1617 }
1618
1619 if ((hdev->card_type == cpucp_card_type_pci) &&
1620 (hdev->nic_ports_mask & 0x3)) {
1621 dev_info(hdev->dev,
1622 "PCI card detected, only 8 ports are enabled\n");
1623 hdev->nic_ports_mask &= ~0x3;
1624
1625 /* Stop and disable unused NIC QMANs */
1626 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1627 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1628 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1629
1630 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
1631 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
1632 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
1633
1634 WREG32(mmNIC0_QM0_GLBL_CFG0, 0);
1635 WREG32(mmNIC0_QM1_GLBL_CFG0, 0);
1636
1637 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1);
1638 }
1639
1640 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0);
1641 if (rc)
1642 return rc;
1643
1644 /* Scrub both SRAM and DRAM */
1645 rc = hdev->asic_funcs->scrub_device_mem(hdev);
1646 if (rc)
1647 goto disable_pci_access;
1648
1649 rc = gaudi_fetch_psoc_frequency(hdev);
1650 if (rc) {
1651 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
1652 goto disable_pci_access;
1653 }
1654
1655 rc = gaudi_mmu_clear_pgt_range(hdev);
1656 if (rc) {
1657 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
1658 goto disable_pci_access;
1659 }
1660
1661 rc = gaudi_init_tpc_mem(hdev);
1662 if (rc) {
1663 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
1664 goto disable_pci_access;
1665 }
1666
1667 rc = gaudi_collective_init(hdev);
1668 if (rc) {
1669 dev_err(hdev->dev, "Failed to init collective\n");
1670 goto disable_pci_access;
1671 }
1672
1673 /* We only support a single ASID for the user, so for the sake of optimization, just
1674 * initialize the ASID one time during device initialization with the fixed value of 1
1675 */
1676 gaudi_mmu_prepare(hdev, 1);
1677
1678 hl_fw_set_pll_profile(hdev);
1679
1680 return 0;
1681
1682 disable_pci_access:
1683 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
1684
1685 return rc;
1686 }
1687
gaudi_late_fini(struct hl_device * hdev)1688 static void gaudi_late_fini(struct hl_device *hdev)
1689 {
1690 hl_hwmon_release_resources(hdev);
1691 }
1692
gaudi_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)1693 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1694 {
1695 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1696 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1697 int i, j, rc = 0;
1698
1699 /*
1700 * The device CPU works with 40-bits addresses, while bit 39 must be set
1701 * to '1' when accessing the host.
1702 * Bits 49:39 of the full host address are saved for a later
1703 * configuration of the HW to perform extension to 50 bits.
1704 * Because there is a single HW register that holds the extension bits,
1705 * these bits must be identical in all allocated range.
1706 */
1707
1708 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1709 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
1710 &dma_addr_arr[i],
1711 GFP_KERNEL | __GFP_ZERO);
1712 if (!virt_addr_arr[i]) {
1713 rc = -ENOMEM;
1714 goto free_dma_mem_arr;
1715 }
1716
1717 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1718 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1719 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1720 break;
1721 }
1722
1723 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1724 dev_err(hdev->dev,
1725 "MSB of CPU accessible DMA memory are not identical in all range\n");
1726 rc = -EFAULT;
1727 goto free_dma_mem_arr;
1728 }
1729
1730 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1731 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1732 hdev->cpu_pci_msb_addr =
1733 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1734
1735 if (!hdev->asic_prop.fw_security_enabled)
1736 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1737
1738 free_dma_mem_arr:
1739 for (j = 0 ; j < i ; j++)
1740 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
1741 dma_addr_arr[j]);
1742
1743 return rc;
1744 }
1745
gaudi_free_internal_qmans_pq_mem(struct hl_device * hdev)1746 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1747 {
1748 struct gaudi_device *gaudi = hdev->asic_specific;
1749 struct gaudi_internal_qman_info *q;
1750 u32 i;
1751
1752 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1753 q = &gaudi->internal_qmans[i];
1754 if (!q->pq_kernel_addr)
1755 continue;
1756 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr);
1757 }
1758 }
1759
gaudi_alloc_internal_qmans_pq_mem(struct hl_device * hdev)1760 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1761 {
1762 struct gaudi_device *gaudi = hdev->asic_specific;
1763 struct gaudi_internal_qman_info *q;
1764 int rc, i;
1765
1766 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1767 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1768 continue;
1769
1770 q = &gaudi->internal_qmans[i];
1771
1772 switch (i) {
1773 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1774 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1775 break;
1776 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1777 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1778 break;
1779 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1780 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1781 break;
1782 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3:
1783 q->pq_size = NIC_QMAN_SIZE_IN_BYTES;
1784 break;
1785 default:
1786 dev_err(hdev->dev, "Bad internal queue index %d", i);
1787 rc = -EINVAL;
1788 goto free_internal_qmans_pq_mem;
1789 }
1790
1791 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr,
1792 GFP_KERNEL | __GFP_ZERO);
1793 if (!q->pq_kernel_addr) {
1794 rc = -ENOMEM;
1795 goto free_internal_qmans_pq_mem;
1796 }
1797 }
1798
1799 return 0;
1800
1801 free_internal_qmans_pq_mem:
1802 gaudi_free_internal_qmans_pq_mem(hdev);
1803 return rc;
1804 }
1805
gaudi_set_pci_memory_regions(struct hl_device * hdev)1806 static void gaudi_set_pci_memory_regions(struct hl_device *hdev)
1807 {
1808 struct asic_fixed_properties *prop = &hdev->asic_prop;
1809 struct pci_mem_region *region;
1810
1811 /* CFG */
1812 region = &hdev->pci_mem_region[PCI_REGION_CFG];
1813 region->region_base = CFG_BASE;
1814 region->region_size = CFG_SIZE;
1815 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR;
1816 region->bar_size = CFG_BAR_SIZE;
1817 region->bar_id = CFG_BAR_ID;
1818 region->used = 1;
1819
1820 /* SRAM */
1821 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
1822 region->region_base = SRAM_BASE_ADDR;
1823 region->region_size = SRAM_SIZE;
1824 region->offset_in_bar = 0;
1825 region->bar_size = SRAM_BAR_SIZE;
1826 region->bar_id = SRAM_BAR_ID;
1827 region->used = 1;
1828
1829 /* DRAM */
1830 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
1831 region->region_base = DRAM_PHYS_BASE;
1832 region->region_size = hdev->asic_prop.dram_size;
1833 region->offset_in_bar = 0;
1834 region->bar_size = prop->dram_pci_bar_size;
1835 region->bar_id = HBM_BAR_ID;
1836 region->used = 1;
1837
1838 /* SP SRAM */
1839 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM];
1840 region->region_base = PSOC_SCRATCHPAD_ADDR;
1841 region->region_size = PSOC_SCRATCHPAD_SIZE;
1842 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR;
1843 region->bar_size = CFG_BAR_SIZE;
1844 region->bar_id = CFG_BAR_ID;
1845 region->used = 1;
1846 }
1847
gaudi_sw_init(struct hl_device * hdev)1848 static int gaudi_sw_init(struct hl_device *hdev)
1849 {
1850 struct gaudi_device *gaudi;
1851 u32 i, event_id = 0;
1852 int rc;
1853
1854 /* Allocate device structure */
1855 gaudi = kzalloc_obj(*gaudi);
1856 if (!gaudi)
1857 return -ENOMEM;
1858
1859 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
1860 if (gaudi_irq_map_table[i].valid) {
1861 if (event_id == GAUDI_EVENT_SIZE) {
1862 dev_err(hdev->dev,
1863 "Event array exceeds the limit of %u events\n",
1864 GAUDI_EVENT_SIZE);
1865 rc = -EINVAL;
1866 goto free_gaudi_device;
1867 }
1868
1869 gaudi->events[event_id++] =
1870 gaudi_irq_map_table[i].fc_id;
1871 }
1872 }
1873
1874 gaudi->cpucp_info_get = gaudi_cpucp_info_get;
1875
1876 hdev->asic_specific = gaudi;
1877
1878 /* Create DMA pool for small allocations */
1879 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1880 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1881 if (!hdev->dma_pool) {
1882 dev_err(hdev->dev, "failed to create DMA pool\n");
1883 rc = -ENOMEM;
1884 goto free_gaudi_device;
1885 }
1886
1887 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1888 if (rc)
1889 goto free_dma_pool;
1890
1891 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1892 if (!hdev->cpu_accessible_dma_pool) {
1893 dev_err(hdev->dev,
1894 "Failed to create CPU accessible DMA pool\n");
1895 rc = -ENOMEM;
1896 goto free_cpu_dma_mem;
1897 }
1898
1899 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1900 (uintptr_t) hdev->cpu_accessible_dma_mem,
1901 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1902 if (rc) {
1903 dev_err(hdev->dev,
1904 "Failed to add memory to CPU accessible DMA pool\n");
1905 rc = -EFAULT;
1906 goto free_cpu_accessible_dma_pool;
1907 }
1908
1909 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1910 if (rc)
1911 goto free_cpu_accessible_dma_pool;
1912
1913 spin_lock_init(&gaudi->hw_queues_lock);
1914
1915 hdev->supports_sync_stream = true;
1916 hdev->supports_coresight = true;
1917 hdev->supports_staged_submission = true;
1918 hdev->supports_wait_for_multi_cs = true;
1919
1920 hdev->asic_funcs->set_pci_memory_regions(hdev);
1921 hdev->stream_master_qid_arr =
1922 hdev->asic_funcs->get_stream_master_qid_arr();
1923 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
1924
1925 return 0;
1926
1927 free_cpu_accessible_dma_pool:
1928 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1929 free_cpu_dma_mem:
1930 if (!hdev->asic_prop.fw_security_enabled)
1931 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1932 hdev->cpu_pci_msb_addr);
1933 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1934 hdev->cpu_accessible_dma_address);
1935 free_dma_pool:
1936 dma_pool_destroy(hdev->dma_pool);
1937 free_gaudi_device:
1938 kfree(gaudi);
1939 return rc;
1940 }
1941
gaudi_sw_fini(struct hl_device * hdev)1942 static int gaudi_sw_fini(struct hl_device *hdev)
1943 {
1944 struct gaudi_device *gaudi = hdev->asic_specific;
1945
1946 gaudi_free_internal_qmans_pq_mem(hdev);
1947
1948 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1949
1950 if (!hdev->asic_prop.fw_security_enabled)
1951 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1952 hdev->cpu_pci_msb_addr);
1953
1954 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
1955 hdev->cpu_accessible_dma_address);
1956
1957 dma_pool_destroy(hdev->dma_pool);
1958
1959 kfree(gaudi);
1960
1961 return 0;
1962 }
1963
gaudi_irq_handler_single(int irq,void * arg)1964 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1965 {
1966 struct hl_device *hdev = arg;
1967 int i;
1968
1969 if (hdev->disabled)
1970 return IRQ_HANDLED;
1971
1972 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1973 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1974
1975 hl_irq_handler_eq(irq, &hdev->event_queue);
1976
1977 return IRQ_HANDLED;
1978 }
1979
1980 /*
1981 * For backward compatibility, new MSI interrupts should be set after the
1982 * existing CPU and NIC interrupts.
1983 */
gaudi_pci_irq_vector(struct hl_device * hdev,unsigned int nr,bool cpu_eq)1984 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1985 bool cpu_eq)
1986 {
1987 int msi_vec;
1988
1989 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1990 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1991 GAUDI_EVENT_QUEUE_MSI_IDX);
1992
1993 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1994 (nr + NIC_NUMBER_OF_ENGINES + 1);
1995
1996 return pci_irq_vector(hdev->pdev, msi_vec);
1997 }
1998
gaudi_enable_msi_single(struct hl_device * hdev)1999 static int gaudi_enable_msi_single(struct hl_device *hdev)
2000 {
2001 int rc, irq;
2002
2003 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n");
2004
2005 irq = gaudi_pci_irq_vector(hdev, 0, false);
2006 rc = request_irq(irq, gaudi_irq_handler_single, 0,
2007 "gaudi single msi", hdev);
2008 if (rc)
2009 dev_err(hdev->dev,
2010 "Failed to request single MSI IRQ\n");
2011
2012 return rc;
2013 }
2014
gaudi_enable_msi(struct hl_device * hdev)2015 static int gaudi_enable_msi(struct hl_device *hdev)
2016 {
2017 struct gaudi_device *gaudi = hdev->asic_specific;
2018 int rc;
2019
2020 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
2021 return 0;
2022
2023 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI);
2024 if (rc < 0) {
2025 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
2026 return rc;
2027 }
2028
2029 rc = gaudi_enable_msi_single(hdev);
2030 if (rc)
2031 goto free_pci_irq_vectors;
2032
2033 gaudi->hw_cap_initialized |= HW_CAP_MSI;
2034
2035 return 0;
2036
2037 free_pci_irq_vectors:
2038 pci_free_irq_vectors(hdev->pdev);
2039 return rc;
2040 }
2041
gaudi_sync_irqs(struct hl_device * hdev)2042 static void gaudi_sync_irqs(struct hl_device *hdev)
2043 {
2044 struct gaudi_device *gaudi = hdev->asic_specific;
2045
2046 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2047 return;
2048
2049 /* Wait for all pending IRQs to be finished */
2050 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
2051 }
2052
gaudi_disable_msi(struct hl_device * hdev)2053 static void gaudi_disable_msi(struct hl_device *hdev)
2054 {
2055 struct gaudi_device *gaudi = hdev->asic_specific;
2056
2057 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
2058 return;
2059
2060 gaudi_sync_irqs(hdev);
2061 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
2062 pci_free_irq_vectors(hdev->pdev);
2063
2064 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
2065 }
2066
gaudi_init_scrambler_sram(struct hl_device * hdev)2067 static void gaudi_init_scrambler_sram(struct hl_device *hdev)
2068 {
2069 struct gaudi_device *gaudi = hdev->asic_specific;
2070
2071 if (hdev->asic_prop.fw_security_enabled)
2072 return;
2073
2074 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
2075 CPU_BOOT_DEV_STS0_SRAM_SCR_EN)
2076 return;
2077
2078 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
2079 return;
2080
2081 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2082 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2083 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2084 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2085 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2086 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2087 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2088 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2089 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2091 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2093 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2095 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2097
2098 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
2099 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2100 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
2101 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2102 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
2103 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2104 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
2105 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2106 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2108 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2110 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2112 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
2114
2115 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
2116 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2117 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
2118 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2119 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
2120 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2121 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
2122 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2123 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2125 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2127 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2129 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
2131
2132 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
2133 }
2134
gaudi_init_scrambler_hbm(struct hl_device * hdev)2135 static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
2136 {
2137 struct gaudi_device *gaudi = hdev->asic_specific;
2138
2139 if (hdev->asic_prop.fw_security_enabled)
2140 return;
2141
2142 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2143 CPU_BOOT_DEV_STS0_DRAM_SCR_EN)
2144 return;
2145
2146 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
2147 return;
2148
2149 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
2150 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2151 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
2152 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2153 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
2154 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2155 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2157 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2159 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2161 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2163 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2165
2166 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
2167 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2168 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
2169 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2170 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
2171 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2172 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
2173 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2174 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2176 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2178 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2180 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
2182
2183 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
2184 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2185 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
2186 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2187 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
2188 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2189 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
2190 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2191 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2193 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2195 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2197 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
2199
2200 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
2201 }
2202
gaudi_init_e2e(struct hl_device * hdev)2203 static void gaudi_init_e2e(struct hl_device *hdev)
2204 {
2205 if (hdev->asic_prop.fw_security_enabled)
2206 return;
2207
2208 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2209 CPU_BOOT_DEV_STS0_E2E_CRED_EN)
2210 return;
2211
2212 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
2214 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
2216
2217 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2219 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2221
2222 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2224 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2226
2227 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2229 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2231
2232 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2234 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2236
2237 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2239 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2241
2242 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2244 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2246
2247 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
2249 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
2251
2252 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
2254 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
2256
2257 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
2259 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
2261
2262 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
2264 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
2266
2267 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
2269 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
2271
2272 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
2274 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
2276
2277 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
2279 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
2281
2282 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
2284 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
2286
2287 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
2289 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
2291
2292 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2296
2297 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2301
2302 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2306
2307 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2311
2312 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2316
2317 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2321
2322 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
2326
2327 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
2331
2332 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
2333 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2334 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
2335 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2336
2337 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
2338 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2339 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
2340 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2341
2342 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
2343 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2344 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
2345 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2346
2347 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
2348 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2349 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
2350 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2351
2352 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
2353 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2354 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
2355 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2356
2357 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
2358 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2359 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
2360 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2361
2362 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
2363 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2364 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
2365 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2366
2367 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
2368 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2369 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
2370 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2371
2372 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
2373 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2374 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
2375 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2376
2377 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
2378 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2379 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
2380 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2381
2382 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
2383 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2384 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
2385 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2386
2387 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
2388 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2389 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
2390 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2391
2392 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
2393 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2394 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
2395 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2396
2397 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
2398 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2399 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
2400 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2401
2402 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
2403 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2404 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
2405 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2406
2407 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
2408 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
2409 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
2410 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
2411
2412 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
2413 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2414 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
2415 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2416
2417 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
2418 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2419 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
2420 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2421
2422 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
2423 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2424 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
2425 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2426
2427 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
2428 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2429 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
2430 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2431
2432 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
2433 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2434 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
2435 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2436
2437 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
2438 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2439 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
2440 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2441
2442 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
2443 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2444 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
2445 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2446
2447 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
2448 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
2449 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
2450 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
2451 }
2452
gaudi_init_hbm_cred(struct hl_device * hdev)2453 static void gaudi_init_hbm_cred(struct hl_device *hdev)
2454 {
2455 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
2456
2457 if (hdev->asic_prop.fw_security_enabled)
2458 return;
2459
2460 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 &
2461 CPU_BOOT_DEV_STS0_HBM_CRED_EN)
2462 return;
2463
2464 hbm0_wr = 0x33333333;
2465 hbm0_rd = 0x77777777;
2466 hbm1_wr = 0x55555555;
2467 hbm1_rd = 0xDDDDDDDD;
2468
2469 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
2470 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
2471 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
2472 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
2473
2474 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
2475 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
2476 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
2477 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
2478
2479 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
2480 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
2481 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
2482 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
2483
2484 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
2485 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
2486 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
2487 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
2488
2489 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
2490 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2491 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2492 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
2493 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2494 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2495 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
2496 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2497 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2498 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
2499 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2500 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2501
2502 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
2503 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2504 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2505 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
2506 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2507 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2508 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
2509 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2510 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2511 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
2512 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
2513 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
2514 }
2515
gaudi_init_golden_registers(struct hl_device * hdev)2516 static void gaudi_init_golden_registers(struct hl_device *hdev)
2517 {
2518 u32 tpc_offset;
2519 int tpc_id, i;
2520
2521 gaudi_init_e2e(hdev);
2522 gaudi_init_hbm_cred(hdev);
2523
2524 for (tpc_id = 0, tpc_offset = 0;
2525 tpc_id < TPC_NUMBER_OF_ENGINES;
2526 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2527 /* Mask all arithmetic interrupts from TPC */
2528 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
2529 /* Set 16 cache lines */
2530 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2531 ICACHE_FETCH_LINE_NUM, 2);
2532 }
2533
2534 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2535 for (i = 0 ; i < 128 ; i += 8)
2536 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2537
2538 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2539 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2540 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2541 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2542 }
2543
gaudi_init_pci_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,dma_addr_t qman_pq_addr)2544 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2545 int qman_id, dma_addr_t qman_pq_addr)
2546 {
2547 struct cpu_dyn_regs *dyn_regs =
2548 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2549 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2550 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2551 u32 q_off, dma_qm_offset;
2552 u32 dma_qm_err_cfg, irq_handler_offset;
2553
2554 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2555
2556 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2557 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2558 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2559 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2560 so_base_en_lo = lower_32_bits(CFG_BASE +
2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2562 so_base_en_hi = upper_32_bits(CFG_BASE +
2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2564 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2565 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2566 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2567 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2568 so_base_ws_lo = lower_32_bits(CFG_BASE +
2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2570 so_base_ws_hi = upper_32_bits(CFG_BASE +
2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2572
2573 q_off = dma_qm_offset + qman_id * 4;
2574
2575 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2576 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2577
2578 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2579 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2580 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2581
2582 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET);
2583 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2584 QMAN_LDMA_SRC_OFFSET);
2585 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2586 QMAN_LDMA_DST_OFFSET);
2587
2588 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2590 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2592 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2594 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2596
2597 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100);
2598
2599 /* The following configuration is needed only once per QMAN */
2600 if (qman_id == 0) {
2601 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2602 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2603 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2604
2605 /* Configure RAZWI IRQ */
2606 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2607 if (hdev->stop_on_err)
2608 dma_qm_err_cfg |=
2609 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2610
2611 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2612
2613 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2614 lower_32_bits(CFG_BASE + irq_handler_offset));
2615 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2616 upper_32_bits(CFG_BASE + irq_handler_offset));
2617
2618 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2619 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2620 dma_id);
2621
2622 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2623 QM_ARB_ERR_MSG_EN_MASK);
2624
2625 /* Set timeout to maximum */
2626 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2627
2628 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2629 QMAN_EXTERNAL_MAKE_TRUSTED);
2630
2631 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2632 }
2633 }
2634
gaudi_init_dma_core(struct hl_device * hdev,int dma_id)2635 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2636 {
2637 struct cpu_dyn_regs *dyn_regs =
2638 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2639 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2640 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2641 u32 irq_handler_offset;
2642
2643 /* Set to maximum possible according to physical size */
2644 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2645 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2646
2647 /* WA for H/W bug H3-2116 */
2648 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15);
2649
2650 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2651 if (hdev->stop_on_err)
2652 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2653
2654 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2655
2656 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2657 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2658 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
2659
2660 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2661 lower_32_bits(CFG_BASE + irq_handler_offset));
2662 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2663 upper_32_bits(CFG_BASE + irq_handler_offset));
2664
2665 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2666 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2667 WREG32(mmDMA0_CORE_PROT + dma_offset,
2668 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2669 /* If the channel is secured, it should be in MMU bypass mode */
2670 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2671 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2672 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2673 }
2674
gaudi_enable_qman(struct hl_device * hdev,int dma_id,u32 enable_mask)2675 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2676 u32 enable_mask)
2677 {
2678 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2679
2680 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2681 }
2682
gaudi_init_pci_dma_qmans(struct hl_device * hdev)2683 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2684 {
2685 struct gaudi_device *gaudi = hdev->asic_specific;
2686 struct hl_hw_queue *q;
2687 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2688
2689 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2690 return;
2691
2692 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2693 dma_id = gaudi_dma_assignment[i];
2694 /*
2695 * For queues after the CPU Q need to add 1 to get the correct
2696 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2697 * order to get the correct MSI register.
2698 */
2699 if (dma_id > 1) {
2700 cpu_skip = 1;
2701 nic_skip = NIC_NUMBER_OF_ENGINES;
2702 } else {
2703 cpu_skip = 0;
2704 nic_skip = 0;
2705 }
2706
2707 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2708 q_idx = 4 * dma_id + j + cpu_skip;
2709 q = &hdev->kernel_queues[q_idx];
2710 q->cq_id = cq_id++;
2711 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2712 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2713 q->bus_address);
2714 }
2715
2716 gaudi_init_dma_core(hdev, dma_id);
2717
2718 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2719 }
2720
2721 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2722 }
2723
gaudi_init_hbm_dma_qman(struct hl_device * hdev,int dma_id,int qman_id,u64 qman_base_addr)2724 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2725 int qman_id, u64 qman_base_addr)
2726 {
2727 struct cpu_dyn_regs *dyn_regs =
2728 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2729 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2730 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2731 u32 dma_qm_err_cfg, irq_handler_offset;
2732 u32 q_off, dma_qm_offset;
2733
2734 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2735
2736 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2737 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2738 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2739 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2740 so_base_en_lo = lower_32_bits(CFG_BASE +
2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2742 so_base_en_hi = upper_32_bits(CFG_BASE +
2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2744 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2745 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2746 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2747 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2748 so_base_ws_lo = lower_32_bits(CFG_BASE +
2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2750 so_base_ws_hi = upper_32_bits(CFG_BASE +
2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2752
2753 q_off = dma_qm_offset + qman_id * 4;
2754
2755 if (qman_id < 4) {
2756 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2757 lower_32_bits(qman_base_addr));
2758 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2759 upper_32_bits(qman_base_addr));
2760
2761 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2762 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2763 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2764
2765 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2766 QMAN_CPDMA_SIZE_OFFSET);
2767 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2768 QMAN_CPDMA_SRC_OFFSET);
2769 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2770 QMAN_CPDMA_DST_OFFSET);
2771 } else {
2772 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2773 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2774 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
2775
2776 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2777 QMAN_LDMA_SIZE_OFFSET);
2778 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2779 QMAN_LDMA_SRC_OFFSET);
2780 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2781 QMAN_LDMA_DST_OFFSET);
2782
2783 /* Configure RAZWI IRQ */
2784 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2785 if (hdev->stop_on_err)
2786 dma_qm_err_cfg |=
2787 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2788
2789 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2790
2791 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2792 lower_32_bits(CFG_BASE + irq_handler_offset));
2793 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2794 upper_32_bits(CFG_BASE + irq_handler_offset));
2795
2796 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2797 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2798 dma_id);
2799
2800 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2801 QM_ARB_ERR_MSG_EN_MASK);
2802
2803 /* Set timeout to maximum */
2804 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT);
2805
2806 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2807 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2808 QMAN_INTERNAL_MAKE_TRUSTED);
2809 }
2810
2811 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2813 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2815
2816 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */
2817 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) {
2818 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
2819 mtr_base_ws_lo);
2820 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
2821 mtr_base_ws_hi);
2822 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
2823 so_base_ws_lo);
2824 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
2825 so_base_ws_hi);
2826 }
2827 }
2828
gaudi_init_hbm_dma_qmans(struct hl_device * hdev)2829 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2830 {
2831 struct gaudi_device *gaudi = hdev->asic_specific;
2832 struct gaudi_internal_qman_info *q;
2833 u64 qman_base_addr;
2834 int i, j, dma_id, internal_q_index;
2835
2836 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2837 return;
2838
2839 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2840 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2841
2842 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2843 /*
2844 * Add the CPU queue in order to get the correct queue
2845 * number as all internal queue are placed after it
2846 */
2847 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2848
2849 q = &gaudi->internal_qmans[internal_q_index];
2850 qman_base_addr = (u64) q->pq_dma_addr;
2851 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2852 qman_base_addr);
2853 }
2854
2855 /* Initializing lower CP for HBM DMA QMAN */
2856 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2857
2858 gaudi_init_dma_core(hdev, dma_id);
2859
2860 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2861 }
2862
2863 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2864 }
2865
gaudi_init_mme_qman(struct hl_device * hdev,u32 mme_offset,int qman_id,u64 qman_base_addr)2866 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2867 int qman_id, u64 qman_base_addr)
2868 {
2869 struct cpu_dyn_regs *dyn_regs =
2870 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2871 u32 mtr_base_lo, mtr_base_hi;
2872 u32 so_base_lo, so_base_hi;
2873 u32 irq_handler_offset;
2874 u32 q_off, mme_id;
2875 u32 mme_qm_err_cfg;
2876
2877 mtr_base_lo = lower_32_bits(CFG_BASE +
2878 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2879 mtr_base_hi = upper_32_bits(CFG_BASE +
2880 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2881 so_base_lo = lower_32_bits(CFG_BASE +
2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2883 so_base_hi = upper_32_bits(CFG_BASE +
2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2885
2886 q_off = mme_offset + qman_id * 4;
2887
2888 if (qman_id < 4) {
2889 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2890 lower_32_bits(qman_base_addr));
2891 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2892 upper_32_bits(qman_base_addr));
2893
2894 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2895 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2896 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2897
2898 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2899 QMAN_CPDMA_SIZE_OFFSET);
2900 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2901 QMAN_CPDMA_SRC_OFFSET);
2902 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2903 QMAN_CPDMA_DST_OFFSET);
2904 } else {
2905 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
2906 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
2907 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
2908
2909 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
2910 QMAN_LDMA_SIZE_OFFSET);
2911 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
2912 QMAN_LDMA_SRC_OFFSET);
2913 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
2914 QMAN_LDMA_DST_OFFSET);
2915
2916 /* Configure RAZWI IRQ */
2917 mme_id = mme_offset /
2918 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2;
2919
2920 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2921 if (hdev->stop_on_err)
2922 mme_qm_err_cfg |=
2923 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2924
2925 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2926
2927 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2928 lower_32_bits(CFG_BASE + irq_handler_offset));
2929 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2930 upper_32_bits(CFG_BASE + irq_handler_offset));
2931
2932 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2933 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2934 mme_id);
2935
2936 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2937 QM_ARB_ERR_MSG_EN_MASK);
2938
2939 /* Set timeout to maximum */
2940 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT);
2941
2942 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2943 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2944 QMAN_INTERNAL_MAKE_TRUSTED);
2945 }
2946
2947 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2949 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2951 }
2952
gaudi_init_mme_qmans(struct hl_device * hdev)2953 static void gaudi_init_mme_qmans(struct hl_device *hdev)
2954 {
2955 struct gaudi_device *gaudi = hdev->asic_specific;
2956 struct gaudi_internal_qman_info *q;
2957 u64 qman_base_addr;
2958 u32 mme_offset;
2959 int i, internal_q_index;
2960
2961 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2962 return;
2963
2964 /*
2965 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2966 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2967 */
2968
2969 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2970
2971 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2972 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2973 q = &gaudi->internal_qmans[internal_q_index];
2974 qman_base_addr = (u64) q->pq_dma_addr;
2975 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2976 qman_base_addr);
2977 if (i == 3)
2978 mme_offset = 0;
2979 }
2980
2981 /* Initializing lower CP for MME QMANs */
2982 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2983 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2984 gaudi_init_mme_qman(hdev, 0, 4, 0);
2985
2986 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2987 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2988
2989 gaudi->hw_cap_initialized |= HW_CAP_MME;
2990 }
2991
gaudi_init_tpc_qman(struct hl_device * hdev,u32 tpc_offset,int qman_id,u64 qman_base_addr)2992 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2993 int qman_id, u64 qman_base_addr)
2994 {
2995 struct cpu_dyn_regs *dyn_regs =
2996 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
2997 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2998 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2999 u32 tpc_qm_err_cfg, irq_handler_offset;
3000 u32 q_off, tpc_id;
3001
3002 mtr_base_en_lo = lower_32_bits(CFG_BASE +
3003 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3004 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3005 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3006 so_base_en_lo = lower_32_bits(CFG_BASE +
3007 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3008 so_base_en_hi = upper_32_bits(CFG_BASE +
3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3010 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
3011 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3012 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3013 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3014 so_base_ws_lo = lower_32_bits(CFG_BASE +
3015 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3016 so_base_ws_hi = upper_32_bits(CFG_BASE +
3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3018
3019 q_off = tpc_offset + qman_id * 4;
3020
3021 tpc_id = tpc_offset /
3022 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
3023
3024 if (qman_id < 4) {
3025 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
3026 lower_32_bits(qman_base_addr));
3027 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
3028 upper_32_bits(qman_base_addr));
3029
3030 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
3031 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
3032 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
3033
3034 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3035 QMAN_CPDMA_SIZE_OFFSET);
3036 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3037 QMAN_CPDMA_SRC_OFFSET);
3038 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3039 QMAN_CPDMA_DST_OFFSET);
3040 } else {
3041 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3042 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3043 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
3044
3045 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3046 QMAN_LDMA_SIZE_OFFSET);
3047 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3048 QMAN_LDMA_SRC_OFFSET);
3049 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3050 QMAN_LDMA_DST_OFFSET);
3051
3052 /* Configure RAZWI IRQ */
3053 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3054 if (hdev->stop_on_err)
3055 tpc_qm_err_cfg |=
3056 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3057
3058 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
3059
3060 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
3061 lower_32_bits(CFG_BASE + irq_handler_offset));
3062 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
3063 upper_32_bits(CFG_BASE + irq_handler_offset));
3064
3065 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
3066 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
3067 tpc_id);
3068
3069 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
3070 QM_ARB_ERR_MSG_EN_MASK);
3071
3072 /* Set timeout to maximum */
3073 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT);
3074
3075 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
3076 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
3077 QMAN_INTERNAL_MAKE_TRUSTED);
3078 }
3079
3080 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3082 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3084
3085 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */
3086 if (tpc_id == 6) {
3087 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off,
3088 mtr_base_ws_lo);
3089 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off,
3090 mtr_base_ws_hi);
3091 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off,
3092 so_base_ws_lo);
3093 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off,
3094 so_base_ws_hi);
3095 }
3096 }
3097
gaudi_init_tpc_qmans(struct hl_device * hdev)3098 static void gaudi_init_tpc_qmans(struct hl_device *hdev)
3099 {
3100 struct gaudi_device *gaudi = hdev->asic_specific;
3101 struct gaudi_internal_qman_info *q;
3102 u64 qman_base_addr;
3103 u32 so_base_hi, tpc_offset = 0;
3104 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
3105 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
3106 int i, tpc_id, internal_q_index;
3107
3108 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
3109 return;
3110
3111 so_base_hi = upper_32_bits(CFG_BASE +
3112 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3113
3114 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3115 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3116 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
3117 tpc_id * QMAN_STREAMS + i;
3118 q = &gaudi->internal_qmans[internal_q_index];
3119 qman_base_addr = (u64) q->pq_dma_addr;
3120 gaudi_init_tpc_qman(hdev, tpc_offset, i,
3121 qman_base_addr);
3122
3123 if (i == 3) {
3124 /* Initializing lower CP for TPC QMAN */
3125 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
3126
3127 /* Enable the QMAN and TPC channel */
3128 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
3129 QMAN_TPC_ENABLE);
3130 }
3131 }
3132
3133 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
3134 so_base_hi);
3135
3136 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3137
3138 gaudi->hw_cap_initialized |=
3139 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id);
3140 }
3141 }
3142
gaudi_init_nic_qman(struct hl_device * hdev,u32 nic_offset,int qman_id,u64 qman_base_addr,int nic_id)3143 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset,
3144 int qman_id, u64 qman_base_addr, int nic_id)
3145 {
3146 struct cpu_dyn_regs *dyn_regs =
3147 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3148 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
3149 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
3150 u32 nic_qm_err_cfg, irq_handler_offset;
3151 u32 q_off;
3152
3153 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3154 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3155 mtr_base_en_hi = upper_32_bits(CFG_BASE +
3156 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3157 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3159 so_base_en_hi = upper_32_bits(CFG_BASE +
3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
3161 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3162 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3163 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
3164 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
3165 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) +
3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3167 so_base_ws_hi = upper_32_bits(CFG_BASE +
3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
3169
3170 q_off = nic_offset + qman_id * 4;
3171
3172 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr));
3173 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr));
3174
3175 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH));
3176 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0);
3177 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0);
3178
3179 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off,
3180 QMAN_LDMA_SIZE_OFFSET);
3181 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off,
3182 QMAN_LDMA_SRC_OFFSET);
3183 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off,
3184 QMAN_LDMA_DST_OFFSET);
3185
3186 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
3188 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
3190
3191 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */
3192 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
3194 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
3196
3197 if (qman_id == 0) {
3198 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
3199 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3200 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
3201
3202 /* Configure RAZWI IRQ */
3203 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
3204 if (hdev->stop_on_err)
3205 nic_qm_err_cfg |=
3206 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
3207
3208 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg);
3209
3210 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset,
3211 lower_32_bits(CFG_BASE + irq_handler_offset));
3212 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset,
3213 upper_32_bits(CFG_BASE + irq_handler_offset));
3214
3215 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset,
3216 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id +
3217 nic_id);
3218
3219 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset,
3220 QM_ARB_ERR_MSG_EN_MASK);
3221
3222 /* Set timeout to maximum */
3223 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT);
3224
3225 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0);
3226 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset,
3227 QMAN_INTERNAL_MAKE_TRUSTED);
3228 }
3229 }
3230
gaudi_init_nic_qmans(struct hl_device * hdev)3231 static void gaudi_init_nic_qmans(struct hl_device *hdev)
3232 {
3233 struct gaudi_device *gaudi = hdev->asic_specific;
3234 struct gaudi_internal_qman_info *q;
3235 u64 qman_base_addr;
3236 u32 nic_offset = 0;
3237 u32 nic_delta_between_qmans =
3238 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3239 u32 nic_delta_between_nics =
3240 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3241 int i, nic_id, internal_q_index;
3242
3243 if (!hdev->nic_ports_mask)
3244 return;
3245
3246 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK)
3247 return;
3248
3249 dev_dbg(hdev->dev, "Initializing NIC QMANs\n");
3250
3251 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3252 if (!(hdev->nic_ports_mask & (1 << nic_id))) {
3253 nic_offset += nic_delta_between_qmans;
3254 if (nic_id & 1) {
3255 nic_offset -= (nic_delta_between_qmans * 2);
3256 nic_offset += nic_delta_between_nics;
3257 }
3258 continue;
3259 }
3260
3261 for (i = 0 ; i < QMAN_STREAMS ; i++) {
3262 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 +
3263 nic_id * QMAN_STREAMS + i;
3264 q = &gaudi->internal_qmans[internal_q_index];
3265 qman_base_addr = (u64) q->pq_dma_addr;
3266 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3),
3267 qman_base_addr, nic_id);
3268 }
3269
3270 /* Enable the QMAN */
3271 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE);
3272
3273 nic_offset += nic_delta_between_qmans;
3274 if (nic_id & 1) {
3275 nic_offset -= (nic_delta_between_qmans * 2);
3276 nic_offset += nic_delta_between_nics;
3277 }
3278
3279 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id);
3280 }
3281 }
3282
gaudi_disable_pci_dma_qmans(struct hl_device * hdev)3283 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
3284 {
3285 struct gaudi_device *gaudi = hdev->asic_specific;
3286
3287 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3288 return;
3289
3290 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
3291 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
3292 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
3293 }
3294
gaudi_disable_hbm_dma_qmans(struct hl_device * hdev)3295 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
3296 {
3297 struct gaudi_device *gaudi = hdev->asic_specific;
3298
3299 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3300 return;
3301
3302 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
3303 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
3304 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
3305 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
3306 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
3307 }
3308
gaudi_disable_mme_qmans(struct hl_device * hdev)3309 static void gaudi_disable_mme_qmans(struct hl_device *hdev)
3310 {
3311 struct gaudi_device *gaudi = hdev->asic_specific;
3312
3313 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3314 return;
3315
3316 WREG32(mmMME2_QM_GLBL_CFG0, 0);
3317 WREG32(mmMME0_QM_GLBL_CFG0, 0);
3318 }
3319
gaudi_disable_tpc_qmans(struct hl_device * hdev)3320 static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
3321 {
3322 struct gaudi_device *gaudi = hdev->asic_specific;
3323 u32 tpc_offset = 0;
3324 int tpc_id;
3325
3326 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3327 return;
3328
3329 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
3330 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
3331 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
3332 }
3333 }
3334
gaudi_disable_nic_qmans(struct hl_device * hdev)3335 static void gaudi_disable_nic_qmans(struct hl_device *hdev)
3336 {
3337 struct gaudi_device *gaudi = hdev->asic_specific;
3338 u32 nic_mask, nic_offset = 0;
3339 u32 nic_delta_between_qmans =
3340 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3341 u32 nic_delta_between_nics =
3342 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0;
3343 int nic_id;
3344
3345 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) {
3346 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id);
3347
3348 if (gaudi->hw_cap_initialized & nic_mask)
3349 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0);
3350
3351 nic_offset += nic_delta_between_qmans;
3352 if (nic_id & 1) {
3353 nic_offset -= (nic_delta_between_qmans * 2);
3354 nic_offset += nic_delta_between_nics;
3355 }
3356 }
3357 }
3358
gaudi_stop_pci_dma_qmans(struct hl_device * hdev)3359 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
3360 {
3361 struct gaudi_device *gaudi = hdev->asic_specific;
3362
3363 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3364 return;
3365
3366 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
3367 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3368 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3369 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3370 }
3371
gaudi_stop_hbm_dma_qmans(struct hl_device * hdev)3372 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
3373 {
3374 struct gaudi_device *gaudi = hdev->asic_specific;
3375
3376 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3377 return;
3378
3379 /* Stop CPs of HBM DMA QMANs */
3380
3381 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3382 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3383 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3384 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3385 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3386 }
3387
gaudi_stop_mme_qmans(struct hl_device * hdev)3388 static void gaudi_stop_mme_qmans(struct hl_device *hdev)
3389 {
3390 struct gaudi_device *gaudi = hdev->asic_specific;
3391
3392 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3393 return;
3394
3395 /* Stop CPs of MME QMANs */
3396 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3397 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3398 }
3399
gaudi_stop_tpc_qmans(struct hl_device * hdev)3400 static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
3401 {
3402 struct gaudi_device *gaudi = hdev->asic_specific;
3403
3404 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3405 return;
3406
3407 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3408 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3409 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3410 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3411 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3412 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3413 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3414 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
3415 }
3416
gaudi_stop_nic_qmans(struct hl_device * hdev)3417 static void gaudi_stop_nic_qmans(struct hl_device *hdev)
3418 {
3419 struct gaudi_device *gaudi = hdev->asic_specific;
3420
3421 /* Stop upper CPs of QMANs */
3422
3423 if (gaudi->hw_cap_initialized & HW_CAP_NIC0)
3424 WREG32(mmNIC0_QM0_GLBL_CFG1,
3425 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3426 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3427 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3428
3429 if (gaudi->hw_cap_initialized & HW_CAP_NIC1)
3430 WREG32(mmNIC0_QM1_GLBL_CFG1,
3431 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3432 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3433 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3434
3435 if (gaudi->hw_cap_initialized & HW_CAP_NIC2)
3436 WREG32(mmNIC1_QM0_GLBL_CFG1,
3437 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3438 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3439 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3440
3441 if (gaudi->hw_cap_initialized & HW_CAP_NIC3)
3442 WREG32(mmNIC1_QM1_GLBL_CFG1,
3443 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3444 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3445 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3446
3447 if (gaudi->hw_cap_initialized & HW_CAP_NIC4)
3448 WREG32(mmNIC2_QM0_GLBL_CFG1,
3449 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3450 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3451 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3452
3453 if (gaudi->hw_cap_initialized & HW_CAP_NIC5)
3454 WREG32(mmNIC2_QM1_GLBL_CFG1,
3455 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3456 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3457 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3458
3459 if (gaudi->hw_cap_initialized & HW_CAP_NIC6)
3460 WREG32(mmNIC3_QM0_GLBL_CFG1,
3461 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3462 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3463 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3464
3465 if (gaudi->hw_cap_initialized & HW_CAP_NIC7)
3466 WREG32(mmNIC3_QM1_GLBL_CFG1,
3467 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3468 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3469 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3470
3471 if (gaudi->hw_cap_initialized & HW_CAP_NIC8)
3472 WREG32(mmNIC4_QM0_GLBL_CFG1,
3473 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3474 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3475 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3476
3477 if (gaudi->hw_cap_initialized & HW_CAP_NIC9)
3478 WREG32(mmNIC4_QM1_GLBL_CFG1,
3479 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK |
3480 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK |
3481 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK);
3482 }
3483
gaudi_pci_dma_stall(struct hl_device * hdev)3484 static void gaudi_pci_dma_stall(struct hl_device *hdev)
3485 {
3486 struct gaudi_device *gaudi = hdev->asic_specific;
3487
3488 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
3489 return;
3490
3491 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3492 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3493 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3494 }
3495
gaudi_hbm_dma_stall(struct hl_device * hdev)3496 static void gaudi_hbm_dma_stall(struct hl_device *hdev)
3497 {
3498 struct gaudi_device *gaudi = hdev->asic_specific;
3499
3500 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
3501 return;
3502
3503 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3504 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3505 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3506 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3507 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
3508 }
3509
gaudi_mme_stall(struct hl_device * hdev)3510 static void gaudi_mme_stall(struct hl_device *hdev)
3511 {
3512 struct gaudi_device *gaudi = hdev->asic_specific;
3513
3514 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
3515 return;
3516
3517 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
3518 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3520 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3522 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3524 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3526 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3528 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3530 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
3532 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
3534 }
3535
gaudi_tpc_stall(struct hl_device * hdev)3536 static void gaudi_tpc_stall(struct hl_device *hdev)
3537 {
3538 struct gaudi_device *gaudi = hdev->asic_specific;
3539
3540 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
3541 return;
3542
3543 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3544 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3545 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3546 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3547 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3548 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3549 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3550 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
3551 }
3552
gaudi_disable_clock_gating(struct hl_device * hdev)3553 static void gaudi_disable_clock_gating(struct hl_device *hdev)
3554 {
3555 u32 qman_offset;
3556 int i;
3557
3558 if (hdev->asic_prop.fw_security_enabled)
3559 return;
3560
3561 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
3562 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
3563 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
3564
3565 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
3566 }
3567
3568 WREG32(mmMME0_QM_CGM_CFG, 0);
3569 WREG32(mmMME0_QM_CGM_CFG1, 0);
3570 WREG32(mmMME2_QM_CGM_CFG, 0);
3571 WREG32(mmMME2_QM_CGM_CFG1, 0);
3572
3573 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
3574 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
3575 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
3576
3577 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
3578 }
3579 }
3580
gaudi_enable_timestamp(struct hl_device * hdev)3581 static void gaudi_enable_timestamp(struct hl_device *hdev)
3582 {
3583 /* Disable the timestamp counter */
3584 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3585
3586 /* Zero the lower/upper parts of the 64-bit counter */
3587 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
3589
3590 /* Enable the counter */
3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
3592 }
3593
gaudi_disable_timestamp(struct hl_device * hdev)3594 static void gaudi_disable_timestamp(struct hl_device *hdev)
3595 {
3596 /* Disable the timestamp counter */
3597 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
3598 }
3599
gaudi_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)3600 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
3601 {
3602 u32 wait_timeout_ms;
3603
3604 if (hdev->pldm)
3605 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
3606 else
3607 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
3608
3609 if (fw_reset)
3610 goto skip_engines;
3611
3612 gaudi_stop_nic_qmans(hdev);
3613 gaudi_stop_mme_qmans(hdev);
3614 gaudi_stop_tpc_qmans(hdev);
3615 gaudi_stop_hbm_dma_qmans(hdev);
3616 gaudi_stop_pci_dma_qmans(hdev);
3617
3618 msleep(wait_timeout_ms);
3619
3620 gaudi_pci_dma_stall(hdev);
3621 gaudi_hbm_dma_stall(hdev);
3622 gaudi_tpc_stall(hdev);
3623 gaudi_mme_stall(hdev);
3624
3625 msleep(wait_timeout_ms);
3626
3627 gaudi_disable_nic_qmans(hdev);
3628 gaudi_disable_mme_qmans(hdev);
3629 gaudi_disable_tpc_qmans(hdev);
3630 gaudi_disable_hbm_dma_qmans(hdev);
3631 gaudi_disable_pci_dma_qmans(hdev);
3632
3633 gaudi_disable_timestamp(hdev);
3634
3635 skip_engines:
3636 gaudi_disable_msi(hdev);
3637 }
3638
gaudi_mmu_init(struct hl_device * hdev)3639 static int gaudi_mmu_init(struct hl_device *hdev)
3640 {
3641 struct asic_fixed_properties *prop = &hdev->asic_prop;
3642 struct gaudi_device *gaudi = hdev->asic_specific;
3643 u64 hop0_addr;
3644 int rc, i;
3645
3646 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
3647 return 0;
3648
3649 for (i = 0 ; i < prop->max_asid ; i++) {
3650 hop0_addr = prop->mmu_pgt_addr +
3651 (i * prop->dmmu.hop_table_size);
3652
3653 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
3654 if (rc) {
3655 dev_err(hdev->dev,
3656 "failed to set hop0 addr for asid %d\n", i);
3657 return rc;
3658 }
3659 }
3660
3661 /* init MMU cache manage page */
3662 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8);
3663 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40);
3664
3665 /* mem cache invalidation */
3666 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
3667
3668 rc = hl_mmu_invalidate_cache(hdev, true, 0);
3669 if (rc)
3670 return rc;
3671
3672 WREG32(mmMMU_UP_MMU_ENABLE, 1);
3673 WREG32(mmMMU_UP_SPI_MASK, 0xF);
3674
3675 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440);
3676
3677 /*
3678 * The H/W expects the first PI after init to be 1. After wraparound
3679 * we'll write 0.
3680 */
3681 gaudi->mmu_cache_inv_pi = 1;
3682
3683 gaudi->hw_cap_initialized |= HW_CAP_MMU;
3684
3685 return 0;
3686 }
3687
gaudi_load_firmware_to_device(struct hl_device * hdev)3688 static int gaudi_load_firmware_to_device(struct hl_device *hdev)
3689 {
3690 void __iomem *dst;
3691
3692 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3693
3694 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0);
3695 }
3696
gaudi_load_boot_fit_to_device(struct hl_device * hdev)3697 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3698 {
3699 void __iomem *dst;
3700
3701 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3702
3703 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0);
3704 }
3705
gaudi_init_dynamic_firmware_loader(struct hl_device * hdev)3706 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev)
3707 {
3708 struct dynamic_fw_load_mgr *dynamic_loader;
3709 struct cpu_dyn_regs *dyn_regs;
3710
3711 dynamic_loader = &hdev->fw_loader.dynamic_loader;
3712
3713 /*
3714 * here we update initial values for few specific dynamic regs (as
3715 * before reading the first descriptor from FW those value has to be
3716 * hard-coded) in later stages of the protocol those values will be
3717 * updated automatically by reading the FW descriptor so data there
3718 * will always be up-to-date
3719 */
3720 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
3721 dyn_regs->kmd_msg_to_cpu =
3722 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
3723 dyn_regs->cpu_cmd_status_to_host =
3724 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
3725
3726 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC;
3727 }
3728
gaudi_init_static_firmware_loader(struct hl_device * hdev)3729 static void gaudi_init_static_firmware_loader(struct hl_device *hdev)
3730 {
3731 struct static_fw_load_mgr *static_loader;
3732
3733 static_loader = &hdev->fw_loader.static_loader;
3734
3735 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3736 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN;
3737 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU;
3738 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST;
3739 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3740 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0;
3741 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1;
3742 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0;
3743 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1;
3744 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET;
3745 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET;
3746 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR));
3747 static_loader->cpu_reset_wait_msec = hdev->pldm ?
3748 GAUDI_PLDM_RESET_WAIT_MSEC :
3749 GAUDI_CPU_RESET_WAIT_MSEC;
3750 }
3751
gaudi_init_firmware_preload_params(struct hl_device * hdev)3752 static void gaudi_init_firmware_preload_params(struct hl_device *hdev)
3753 {
3754 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
3755
3756 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
3757 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
3758 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
3759 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
3760 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
3761 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3762 }
3763
gaudi_init_firmware_loader(struct hl_device * hdev)3764 static void gaudi_init_firmware_loader(struct hl_device *hdev)
3765 {
3766 struct asic_fixed_properties *prop = &hdev->asic_prop;
3767 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
3768
3769 /* fill common fields */
3770 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
3771 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE;
3772 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE;
3773 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC;
3774 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC;
3775 fw_loader->skip_bmc = !hdev->bmc_enable;
3776 fw_loader->sram_bar_id = SRAM_BAR_ID;
3777 fw_loader->dram_bar_id = HBM_BAR_ID;
3778
3779 if (prop->dynamic_fw_load)
3780 gaudi_init_dynamic_firmware_loader(hdev);
3781 else
3782 gaudi_init_static_firmware_loader(hdev);
3783 }
3784
gaudi_init_cpu(struct hl_device * hdev)3785 static int gaudi_init_cpu(struct hl_device *hdev)
3786 {
3787 struct gaudi_device *gaudi = hdev->asic_specific;
3788 int rc;
3789
3790 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
3791 return 0;
3792
3793 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3794 return 0;
3795
3796 /*
3797 * The device CPU works with 40 bits addresses.
3798 * This register sets the extension to 50 bits.
3799 */
3800 if (!hdev->asic_prop.fw_security_enabled)
3801 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3802
3803 rc = hl_fw_init_cpu(hdev);
3804
3805 if (rc)
3806 return rc;
3807
3808 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3809
3810 return 0;
3811 }
3812
gaudi_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)3813 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3814 {
3815 struct cpu_dyn_regs *dyn_regs =
3816 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3817 struct asic_fixed_properties *prop = &hdev->asic_prop;
3818 struct gaudi_device *gaudi = hdev->asic_specific;
3819 u32 status, irq_handler_offset;
3820 struct hl_eq *eq;
3821 struct hl_hw_queue *cpu_pq =
3822 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3823 int err;
3824
3825 if (!hdev->cpu_queues_enable)
3826 return 0;
3827
3828 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3829 return 0;
3830
3831 eq = &hdev->event_queue;
3832
3833 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3835
3836 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3838
3839 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3840 lower_32_bits(hdev->cpu_accessible_dma_address));
3841 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3842 upper_32_bits(hdev->cpu_accessible_dma_address));
3843
3844 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3845 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3846 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3847
3848 /* Used for EQ CI */
3849 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3850
3851 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3852
3853 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3854
3855 irq_handler_offset = prop->gic_interrupts_enable ?
3856 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
3857 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
3858
3859 WREG32(irq_handler_offset,
3860 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
3861
3862 err = hl_poll_timeout(
3863 hdev,
3864 mmCPU_IF_QUEUE_INIT,
3865 status,
3866 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3867 1000,
3868 cpu_timeout);
3869
3870 if (err) {
3871 dev_err(hdev->dev,
3872 "Failed to communicate with Device CPU (CPU-CP timeout)\n");
3873 return -EIO;
3874 }
3875
3876 /* update FW application security bits */
3877 if (prop->fw_cpu_boot_dev_sts0_valid)
3878 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
3879 if (prop->fw_cpu_boot_dev_sts1_valid)
3880 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
3881
3882 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3883 return 0;
3884 }
3885
gaudi_pre_hw_init(struct hl_device * hdev)3886 static void gaudi_pre_hw_init(struct hl_device *hdev)
3887 {
3888 /* Perform read from the device to make sure device is up */
3889 RREG32(mmHW_STATE);
3890
3891 if (!hdev->asic_prop.fw_security_enabled) {
3892 /* Set the access through PCI bars (Linux driver only) as
3893 * secured
3894 */
3895 WREG32(mmPCIE_WRAP_LBW_PROT_OVR,
3896 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3897 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3898
3899 /* Perform read to flush the waiting writes to ensure
3900 * configuration was set in the device
3901 */
3902 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3903 }
3904
3905 /*
3906 * Let's mark in the H/W that we have reached this point. We check
3907 * this value in the reset_before_init function to understand whether
3908 * we need to reset the chip before doing H/W init. This register is
3909 * cleared by the H/W upon H/W reset
3910 */
3911 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3912 }
3913
gaudi_hw_init(struct hl_device * hdev)3914 static int gaudi_hw_init(struct hl_device *hdev)
3915 {
3916 struct gaudi_device *gaudi = hdev->asic_specific;
3917 int rc;
3918
3919 gaudi_pre_hw_init(hdev);
3920
3921 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
3922 * So we set it here and if anyone tries to move it later to
3923 * a different address, there will be an error
3924 */
3925 if (hdev->asic_prop.iatu_done_by_fw)
3926 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE;
3927
3928 /*
3929 * Before pushing u-boot/linux to device, need to set the hbm bar to
3930 * base address of dram
3931 */
3932 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3933 dev_err(hdev->dev,
3934 "failed to map HBM bar to DRAM base address\n");
3935 return -EIO;
3936 }
3937
3938 rc = gaudi_init_cpu(hdev);
3939 if (rc) {
3940 dev_err(hdev->dev, "failed to initialize CPU\n");
3941 return rc;
3942 }
3943
3944 /* In case the clock gating was enabled in preboot we need to disable
3945 * it here before touching the MME/TPC registers.
3946 */
3947 gaudi_disable_clock_gating(hdev);
3948
3949 /* SRAM scrambler must be initialized after CPU is running from HBM */
3950 gaudi_init_scrambler_sram(hdev);
3951
3952 /* This is here just in case we are working without CPU */
3953 gaudi_init_scrambler_hbm(hdev);
3954
3955 gaudi_init_golden_registers(hdev);
3956
3957 rc = gaudi_mmu_init(hdev);
3958 if (rc)
3959 return rc;
3960
3961 gaudi_init_security(hdev);
3962
3963 gaudi_init_pci_dma_qmans(hdev);
3964
3965 gaudi_init_hbm_dma_qmans(hdev);
3966
3967 gaudi_init_mme_qmans(hdev);
3968
3969 gaudi_init_tpc_qmans(hdev);
3970
3971 gaudi_init_nic_qmans(hdev);
3972
3973 gaudi_enable_timestamp(hdev);
3974
3975 /* MSI must be enabled before CPU queues and NIC are initialized */
3976 rc = gaudi_enable_msi(hdev);
3977 if (rc)
3978 goto disable_queues;
3979
3980 /* must be called after MSI was enabled */
3981 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3982 if (rc) {
3983 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3984 rc);
3985 goto disable_msi;
3986 }
3987
3988 /* Perform read from the device to flush all configuration */
3989 RREG32(mmHW_STATE);
3990
3991 return 0;
3992
3993 disable_msi:
3994 gaudi_disable_msi(hdev);
3995 disable_queues:
3996 gaudi_disable_mme_qmans(hdev);
3997 gaudi_disable_pci_dma_qmans(hdev);
3998
3999 return rc;
4000 }
4001
gaudi_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)4002 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4003 {
4004 struct cpu_dyn_regs *dyn_regs =
4005 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4006 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset;
4007 struct gaudi_device *gaudi = hdev->asic_specific;
4008 bool driver_performs_reset;
4009
4010 if (!hard_reset) {
4011 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n");
4012 return 0;
4013 }
4014
4015 if (hdev->pldm) {
4016 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
4017 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
4018 } else {
4019 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
4020 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
4021 }
4022
4023 if (fw_reset) {
4024 dev_dbg(hdev->dev,
4025 "Firmware performs HARD reset, going to wait %dms\n",
4026 reset_timeout_ms);
4027
4028 goto skip_reset;
4029 }
4030
4031 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
4032 !hdev->asic_prop.hard_reset_done_by_fw);
4033
4034 /* Set device to handle FLR by H/W as we will put the device CPU to
4035 * halt mode
4036 */
4037 if (driver_performs_reset)
4038 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK |
4039 PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
4040
4041 /* If linux is loaded in the device CPU we need to communicate with it
4042 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU
4043 * registers in case of old F/Ws
4044 */
4045 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) {
4046 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4047 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4048 le32_to_cpu(dyn_regs->gic_host_halt_irq);
4049
4050 WREG32(irq_handler_offset,
4051 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id);
4052
4053 /* This is a hail-mary attempt to revive the card in the small chance that the
4054 * f/w has experienced a watchdog event, which caused it to return back to preboot.
4055 * In that case, triggering reset through GIC won't help. We need to trigger the
4056 * reset as if Linux wasn't loaded.
4057 *
4058 * We do it only if the reset cause was HB, because that would be the indication
4059 * of such an event.
4060 *
4061 * In case watchdog hasn't expired but we still got HB, then this won't do any
4062 * damage.
4063 */
4064 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) {
4065 if (hdev->asic_prop.hard_reset_done_by_fw)
4066 hl_fw_ask_hard_reset_without_linux(hdev);
4067 else
4068 hl_fw_ask_halt_machine_without_linux(hdev);
4069 }
4070 } else {
4071 if (hdev->asic_prop.hard_reset_done_by_fw)
4072 hl_fw_ask_hard_reset_without_linux(hdev);
4073 else
4074 hl_fw_ask_halt_machine_without_linux(hdev);
4075 }
4076
4077 if (driver_performs_reset) {
4078
4079 /* Configure the reset registers. Must be done as early as
4080 * possible in case we fail during H/W initialization
4081 */
4082 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
4083 (CFG_RST_H_DMA_MASK |
4084 CFG_RST_H_MME_MASK |
4085 CFG_RST_H_SM_MASK |
4086 CFG_RST_H_TPC_7_MASK));
4087
4088 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
4089
4090 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
4091 (CFG_RST_H_HBM_MASK |
4092 CFG_RST_H_TPC_7_MASK |
4093 CFG_RST_H_NIC_MASK |
4094 CFG_RST_H_SM_MASK |
4095 CFG_RST_H_DMA_MASK |
4096 CFG_RST_H_MME_MASK |
4097 CFG_RST_H_CPU_MASK |
4098 CFG_RST_H_MMU_MASK));
4099
4100 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
4101 (CFG_RST_L_IF_MASK |
4102 CFG_RST_L_PSOC_MASK |
4103 CFG_RST_L_TPC_MASK));
4104
4105 msleep(cpu_timeout_ms);
4106
4107 /* Tell ASIC not to re-initialize PCIe */
4108 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
4109
4110 /* Restart BTL/BLR upon hard-reset */
4111 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
4112
4113 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
4114 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
4115
4116 dev_dbg(hdev->dev,
4117 "Issued HARD reset command, going to wait %dms\n",
4118 reset_timeout_ms);
4119 } else {
4120 dev_dbg(hdev->dev,
4121 "Firmware performs HARD reset, going to wait %dms\n",
4122 reset_timeout_ms);
4123 }
4124
4125 skip_reset:
4126 /*
4127 * After hard reset, we can't poll the BTM_FSM register because the PSOC
4128 * itself is in reset. Need to wait until the reset is deasserted
4129 */
4130 msleep(reset_timeout_ms);
4131
4132 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
4133 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) {
4134 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status);
4135 return -ETIMEDOUT;
4136 }
4137
4138 if (gaudi) {
4139 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM |
4140 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK |
4141 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK |
4142 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER |
4143 HW_CAP_HBM_SCRAMBLER);
4144
4145 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
4146
4147 hdev->device_cpu_is_halted = false;
4148 }
4149 return 0;
4150 }
4151
gaudi_suspend(struct hl_device * hdev)4152 static int gaudi_suspend(struct hl_device *hdev)
4153 {
4154 return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
4155 }
4156
gaudi_resume(struct hl_device * hdev)4157 static int gaudi_resume(struct hl_device *hdev)
4158 {
4159 return gaudi_init_iatu(hdev);
4160 }
4161
gaudi_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)4162 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
4163 void *cpu_addr, dma_addr_t dma_addr, size_t size)
4164 {
4165 int rc;
4166
4167 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
4168 VM_DONTCOPY | VM_NORESERVE);
4169
4170 #ifdef _HAS_DMA_MMAP_COHERENT
4171 /*
4172 * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP
4173 * so vm_insert_page() can handle it safely. Without this, the kernel
4174 * may BUG_ON due to VM_PFNMAP.
4175 */
4176 if (is_vmalloc_addr(cpu_addr))
4177 vm_flags_set(vma, VM_MIXEDMAP);
4178
4179 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr,
4180 (dma_addr - HOST_PHYS_BASE), size);
4181 if (rc)
4182 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
4183 #else
4184
4185 rc = remap_pfn_range(vma, vma->vm_start,
4186 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
4187 size, vma->vm_page_prot);
4188 if (rc)
4189 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
4190
4191 #endif
4192
4193
4194 return rc;
4195 }
4196
gaudi_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)4197 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
4198 {
4199 struct cpu_dyn_regs *dyn_regs =
4200 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4201 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset;
4202 struct gaudi_device *gaudi = hdev->asic_specific;
4203 bool invalid_queue = false;
4204 int dma_id;
4205
4206 switch (hw_queue_id) {
4207 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
4208 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
4209 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4210 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4211 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4212 break;
4213
4214 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
4215 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
4216 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4217 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
4218 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4219 break;
4220
4221 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
4222 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
4223 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4224 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4225 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4226 break;
4227
4228 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
4229 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
4230 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4231 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4232 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4233 break;
4234
4235 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
4236 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
4237 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4238 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4239 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4240 break;
4241
4242 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
4243 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
4244 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4245 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4246 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4247 break;
4248
4249 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
4250 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
4251 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4252 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4253 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4254 break;
4255
4256 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
4257 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6];
4258 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
4259 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
4260 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
4261 break;
4262
4263 case GAUDI_QUEUE_ID_CPU_PQ:
4264 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
4265 db_reg_offset = mmCPU_IF_PF_PQ_PI;
4266 else
4267 invalid_queue = true;
4268 break;
4269
4270 case GAUDI_QUEUE_ID_MME_0_0:
4271 db_reg_offset = mmMME2_QM_PQ_PI_0;
4272 break;
4273
4274 case GAUDI_QUEUE_ID_MME_0_1:
4275 db_reg_offset = mmMME2_QM_PQ_PI_1;
4276 break;
4277
4278 case GAUDI_QUEUE_ID_MME_0_2:
4279 db_reg_offset = mmMME2_QM_PQ_PI_2;
4280 break;
4281
4282 case GAUDI_QUEUE_ID_MME_0_3:
4283 db_reg_offset = mmMME2_QM_PQ_PI_3;
4284 break;
4285
4286 case GAUDI_QUEUE_ID_MME_1_0:
4287 db_reg_offset = mmMME0_QM_PQ_PI_0;
4288 break;
4289
4290 case GAUDI_QUEUE_ID_MME_1_1:
4291 db_reg_offset = mmMME0_QM_PQ_PI_1;
4292 break;
4293
4294 case GAUDI_QUEUE_ID_MME_1_2:
4295 db_reg_offset = mmMME0_QM_PQ_PI_2;
4296 break;
4297
4298 case GAUDI_QUEUE_ID_MME_1_3:
4299 db_reg_offset = mmMME0_QM_PQ_PI_3;
4300 break;
4301
4302 case GAUDI_QUEUE_ID_TPC_0_0:
4303 db_reg_offset = mmTPC0_QM_PQ_PI_0;
4304 break;
4305
4306 case GAUDI_QUEUE_ID_TPC_0_1:
4307 db_reg_offset = mmTPC0_QM_PQ_PI_1;
4308 break;
4309
4310 case GAUDI_QUEUE_ID_TPC_0_2:
4311 db_reg_offset = mmTPC0_QM_PQ_PI_2;
4312 break;
4313
4314 case GAUDI_QUEUE_ID_TPC_0_3:
4315 db_reg_offset = mmTPC0_QM_PQ_PI_3;
4316 break;
4317
4318 case GAUDI_QUEUE_ID_TPC_1_0:
4319 db_reg_offset = mmTPC1_QM_PQ_PI_0;
4320 break;
4321
4322 case GAUDI_QUEUE_ID_TPC_1_1:
4323 db_reg_offset = mmTPC1_QM_PQ_PI_1;
4324 break;
4325
4326 case GAUDI_QUEUE_ID_TPC_1_2:
4327 db_reg_offset = mmTPC1_QM_PQ_PI_2;
4328 break;
4329
4330 case GAUDI_QUEUE_ID_TPC_1_3:
4331 db_reg_offset = mmTPC1_QM_PQ_PI_3;
4332 break;
4333
4334 case GAUDI_QUEUE_ID_TPC_2_0:
4335 db_reg_offset = mmTPC2_QM_PQ_PI_0;
4336 break;
4337
4338 case GAUDI_QUEUE_ID_TPC_2_1:
4339 db_reg_offset = mmTPC2_QM_PQ_PI_1;
4340 break;
4341
4342 case GAUDI_QUEUE_ID_TPC_2_2:
4343 db_reg_offset = mmTPC2_QM_PQ_PI_2;
4344 break;
4345
4346 case GAUDI_QUEUE_ID_TPC_2_3:
4347 db_reg_offset = mmTPC2_QM_PQ_PI_3;
4348 break;
4349
4350 case GAUDI_QUEUE_ID_TPC_3_0:
4351 db_reg_offset = mmTPC3_QM_PQ_PI_0;
4352 break;
4353
4354 case GAUDI_QUEUE_ID_TPC_3_1:
4355 db_reg_offset = mmTPC3_QM_PQ_PI_1;
4356 break;
4357
4358 case GAUDI_QUEUE_ID_TPC_3_2:
4359 db_reg_offset = mmTPC3_QM_PQ_PI_2;
4360 break;
4361
4362 case GAUDI_QUEUE_ID_TPC_3_3:
4363 db_reg_offset = mmTPC3_QM_PQ_PI_3;
4364 break;
4365
4366 case GAUDI_QUEUE_ID_TPC_4_0:
4367 db_reg_offset = mmTPC4_QM_PQ_PI_0;
4368 break;
4369
4370 case GAUDI_QUEUE_ID_TPC_4_1:
4371 db_reg_offset = mmTPC4_QM_PQ_PI_1;
4372 break;
4373
4374 case GAUDI_QUEUE_ID_TPC_4_2:
4375 db_reg_offset = mmTPC4_QM_PQ_PI_2;
4376 break;
4377
4378 case GAUDI_QUEUE_ID_TPC_4_3:
4379 db_reg_offset = mmTPC4_QM_PQ_PI_3;
4380 break;
4381
4382 case GAUDI_QUEUE_ID_TPC_5_0:
4383 db_reg_offset = mmTPC5_QM_PQ_PI_0;
4384 break;
4385
4386 case GAUDI_QUEUE_ID_TPC_5_1:
4387 db_reg_offset = mmTPC5_QM_PQ_PI_1;
4388 break;
4389
4390 case GAUDI_QUEUE_ID_TPC_5_2:
4391 db_reg_offset = mmTPC5_QM_PQ_PI_2;
4392 break;
4393
4394 case GAUDI_QUEUE_ID_TPC_5_3:
4395 db_reg_offset = mmTPC5_QM_PQ_PI_3;
4396 break;
4397
4398 case GAUDI_QUEUE_ID_TPC_6_0:
4399 db_reg_offset = mmTPC6_QM_PQ_PI_0;
4400 break;
4401
4402 case GAUDI_QUEUE_ID_TPC_6_1:
4403 db_reg_offset = mmTPC6_QM_PQ_PI_1;
4404 break;
4405
4406 case GAUDI_QUEUE_ID_TPC_6_2:
4407 db_reg_offset = mmTPC6_QM_PQ_PI_2;
4408 break;
4409
4410 case GAUDI_QUEUE_ID_TPC_6_3:
4411 db_reg_offset = mmTPC6_QM_PQ_PI_3;
4412 break;
4413
4414 case GAUDI_QUEUE_ID_TPC_7_0:
4415 db_reg_offset = mmTPC7_QM_PQ_PI_0;
4416 break;
4417
4418 case GAUDI_QUEUE_ID_TPC_7_1:
4419 db_reg_offset = mmTPC7_QM_PQ_PI_1;
4420 break;
4421
4422 case GAUDI_QUEUE_ID_TPC_7_2:
4423 db_reg_offset = mmTPC7_QM_PQ_PI_2;
4424 break;
4425
4426 case GAUDI_QUEUE_ID_TPC_7_3:
4427 db_reg_offset = mmTPC7_QM_PQ_PI_3;
4428 break;
4429
4430 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3:
4431 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0))
4432 invalid_queue = true;
4433
4434 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4435 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off;
4436 break;
4437
4438 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3:
4439 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1))
4440 invalid_queue = true;
4441
4442 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4443 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off;
4444 break;
4445
4446 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3:
4447 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2))
4448 invalid_queue = true;
4449
4450 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4451 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off;
4452 break;
4453
4454 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3:
4455 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3))
4456 invalid_queue = true;
4457
4458 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4459 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off;
4460 break;
4461
4462 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3:
4463 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4))
4464 invalid_queue = true;
4465
4466 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4467 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off;
4468 break;
4469
4470 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3:
4471 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5))
4472 invalid_queue = true;
4473
4474 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4475 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off;
4476 break;
4477
4478 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3:
4479 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6))
4480 invalid_queue = true;
4481
4482 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4483 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off;
4484 break;
4485
4486 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3:
4487 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7))
4488 invalid_queue = true;
4489
4490 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4491 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off;
4492 break;
4493
4494 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3:
4495 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8))
4496 invalid_queue = true;
4497
4498 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4499 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off;
4500 break;
4501
4502 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3:
4503 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9))
4504 invalid_queue = true;
4505
4506 q_off = ((hw_queue_id - 1) & 0x3) * 4;
4507 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off;
4508 break;
4509
4510 default:
4511 invalid_queue = true;
4512 }
4513
4514 if (invalid_queue) {
4515 /* Should never get here */
4516 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
4517 hw_queue_id);
4518 return;
4519 }
4520
4521 db_value = pi;
4522
4523 /* ring the doorbell */
4524 WREG32(db_reg_offset, db_value);
4525
4526 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
4527 /* make sure device CPU will read latest data from host */
4528 mb();
4529
4530 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
4531 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
4532 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq);
4533
4534 WREG32(irq_handler_offset,
4535 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id);
4536 }
4537 }
4538
gaudi_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)4539 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
4540 struct hl_bd *bd)
4541 {
4542 __le64 *pbd = (__le64 *) bd;
4543
4544 /* The QMANs are on the host memory so a simple copy suffice */
4545 pqe[0] = pbd[0];
4546 pqe[1] = pbd[1];
4547 }
4548
gaudi_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)4549 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
4550 dma_addr_t *dma_handle, gfp_t flags)
4551 {
4552 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
4553 dma_handle, flags);
4554
4555 /* Shift to the device's base physical address of host memory */
4556 if (kernel_addr)
4557 *dma_handle += HOST_PHYS_BASE;
4558
4559 return kernel_addr;
4560 }
4561
gaudi_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)4562 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
4563 void *cpu_addr, dma_addr_t dma_handle)
4564 {
4565 /* Cancel the device's base physical address of host memory */
4566 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
4567
4568 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
4569 }
4570
gaudi_scrub_device_dram(struct hl_device * hdev,u64 val)4571 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
4572 {
4573 struct asic_fixed_properties *prop = &hdev->asic_prop;
4574 u64 cur_addr = prop->dram_user_base_address;
4575 u32 chunk_size, busy;
4576 int rc, dma_id;
4577
4578 while (cur_addr < prop->dram_end_address) {
4579 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4580 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4581
4582 chunk_size =
4583 min((u64)SZ_2G, prop->dram_end_address - cur_addr);
4584
4585 dev_dbg(hdev->dev,
4586 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
4587 cur_addr, cur_addr + chunk_size);
4588
4589 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset,
4590 lower_32_bits(val));
4591 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset,
4592 upper_32_bits(val));
4593 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
4594 lower_32_bits(cur_addr));
4595 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
4596 upper_32_bits(cur_addr));
4597 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset,
4598 chunk_size);
4599 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
4600 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) |
4601 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT)));
4602
4603 cur_addr += chunk_size;
4604
4605 if (cur_addr == prop->dram_end_address)
4606 break;
4607 }
4608
4609 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) {
4610 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
4611
4612 rc = hl_poll_timeout(
4613 hdev,
4614 mmDMA0_CORE_STS0 + dma_offset,
4615 busy,
4616 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0),
4617 1000,
4618 HBM_SCRUBBING_TIMEOUT_US);
4619
4620 if (rc) {
4621 dev_err(hdev->dev,
4622 "DMA Timeout during HBM scrubbing of DMA #%d\n",
4623 dma_id);
4624 return -EIO;
4625 }
4626 }
4627 }
4628
4629 return 0;
4630 }
4631
gaudi_scrub_device_mem(struct hl_device * hdev)4632 static int gaudi_scrub_device_mem(struct hl_device *hdev)
4633 {
4634 struct asic_fixed_properties *prop = &hdev->asic_prop;
4635 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
4636 u64 addr, size, val = hdev->memory_scrub_val;
4637 ktime_t timeout;
4638 int rc = 0;
4639
4640 if (!hdev->memory_scrub)
4641 return 0;
4642
4643 timeout = ktime_add_us(ktime_get(), wait_to_idle_time);
4644 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
4645 if (ktime_compare(ktime_get(), timeout) > 0) {
4646 dev_err(hdev->dev, "waiting for idle timeout\n");
4647 return -ETIMEDOUT;
4648 }
4649 usleep_range((1000 >> 2) + 1, 1000);
4650 }
4651
4652 /* Scrub SRAM */
4653 addr = prop->sram_user_base_address;
4654 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET;
4655
4656 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n",
4657 addr, addr + size, val);
4658 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4659 if (rc) {
4660 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc);
4661 return rc;
4662 }
4663
4664 /* Scrub HBM using all DMA channels in parallel */
4665 rc = gaudi_scrub_device_dram(hdev, val);
4666 if (rc) {
4667 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc);
4668 return rc;
4669 }
4670
4671 return 0;
4672 }
4673
gaudi_get_int_queue_base(struct hl_device * hdev,u32 queue_id,dma_addr_t * dma_handle,u16 * queue_len)4674 static void *gaudi_get_int_queue_base(struct hl_device *hdev,
4675 u32 queue_id, dma_addr_t *dma_handle,
4676 u16 *queue_len)
4677 {
4678 struct gaudi_device *gaudi = hdev->asic_specific;
4679 struct gaudi_internal_qman_info *q;
4680
4681 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
4682 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
4683 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
4684 return NULL;
4685 }
4686
4687 q = &gaudi->internal_qmans[queue_id];
4688 *dma_handle = q->pq_dma_addr;
4689 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
4690
4691 return q->pq_kernel_addr;
4692 }
4693
gaudi_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)4694 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
4695 u16 len, u32 timeout, u64 *result)
4696 {
4697 struct gaudi_device *gaudi = hdev->asic_specific;
4698
4699 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
4700 if (result)
4701 *result = 0;
4702 return 0;
4703 }
4704
4705 if (!timeout)
4706 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC;
4707
4708 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
4709 timeout, result);
4710 }
4711
gaudi_test_queue(struct hl_device * hdev,u32 hw_queue_id)4712 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
4713 {
4714 struct packet_msg_prot *fence_pkt;
4715 dma_addr_t pkt_dma_addr;
4716 u32 fence_val, tmp, timeout_usec;
4717 dma_addr_t fence_dma_addr;
4718 u32 *fence_ptr;
4719 int rc;
4720
4721 if (hdev->pldm)
4722 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
4723 else
4724 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
4725
4726 fence_val = GAUDI_QMAN0_FENCE_VAL;
4727
4728 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
4729 if (!fence_ptr) {
4730 dev_err(hdev->dev,
4731 "Failed to allocate memory for H/W queue %d testing\n",
4732 hw_queue_id);
4733 return -ENOMEM;
4734 }
4735
4736 *fence_ptr = 0;
4737
4738 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL,
4739 &pkt_dma_addr);
4740 if (!fence_pkt) {
4741 dev_err(hdev->dev,
4742 "Failed to allocate packet for H/W queue %d testing\n",
4743 hw_queue_id);
4744 rc = -ENOMEM;
4745 goto free_fence_ptr;
4746 }
4747
4748 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
4749 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
4750 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
4751
4752 fence_pkt->ctl = cpu_to_le32(tmp);
4753 fence_pkt->value = cpu_to_le32(fence_val);
4754 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4755
4756 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
4757 sizeof(struct packet_msg_prot),
4758 pkt_dma_addr);
4759 if (rc) {
4760 dev_err(hdev->dev,
4761 "Failed to send fence packet to H/W queue %d\n",
4762 hw_queue_id);
4763 goto free_pkt;
4764 }
4765
4766 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
4767 1000, timeout_usec, true);
4768
4769 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
4770
4771 if (rc == -ETIMEDOUT) {
4772 dev_err(hdev->dev,
4773 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
4774 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
4775 rc = -EIO;
4776 }
4777
4778 free_pkt:
4779 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr);
4780 free_fence_ptr:
4781 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
4782 return rc;
4783 }
4784
gaudi_test_cpu_queue(struct hl_device * hdev)4785 static int gaudi_test_cpu_queue(struct hl_device *hdev)
4786 {
4787 struct gaudi_device *gaudi = hdev->asic_specific;
4788
4789 /*
4790 * check capability here as send_cpu_message() won't update the result
4791 * value if no capability
4792 */
4793 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
4794 return 0;
4795
4796 return hl_fw_test_cpu_queue(hdev);
4797 }
4798
gaudi_test_queues(struct hl_device * hdev)4799 static int gaudi_test_queues(struct hl_device *hdev)
4800 {
4801 int i, rc, ret_val = 0;
4802
4803 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) {
4804 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
4805 rc = gaudi_test_queue(hdev, i);
4806 if (rc)
4807 ret_val = -EINVAL;
4808 }
4809 }
4810
4811 rc = gaudi_test_cpu_queue(hdev);
4812 if (rc)
4813 ret_val = -EINVAL;
4814
4815 return ret_val;
4816 }
4817
gaudi_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)4818 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
4819 gfp_t mem_flags, dma_addr_t *dma_handle)
4820 {
4821 void *kernel_addr;
4822
4823 if (size > GAUDI_DMA_POOL_BLK_SIZE)
4824 return NULL;
4825
4826 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
4827
4828 /* Shift to the device's base physical address of host memory */
4829 if (kernel_addr)
4830 *dma_handle += HOST_PHYS_BASE;
4831
4832 return kernel_addr;
4833 }
4834
gaudi_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)4835 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
4836 dma_addr_t dma_addr)
4837 {
4838 /* Cancel the device's base physical address of host memory */
4839 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
4840
4841 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
4842 }
4843
gaudi_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)4844 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
4845 size_t size, dma_addr_t *dma_handle)
4846 {
4847 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
4848 }
4849
gaudi_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)4850 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
4851 size_t size, void *vaddr)
4852 {
4853 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
4854 }
4855
gaudi_get_dma_desc_list_size(struct hl_device * hdev,struct sg_table * sgt)4856 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt)
4857 {
4858 struct scatterlist *sg, *sg_next_iter;
4859 u32 count, dma_desc_cnt;
4860 u64 len, len_next;
4861 dma_addr_t addr, addr_next;
4862
4863 dma_desc_cnt = 0;
4864
4865 for_each_sgtable_dma_sg(sgt, sg, count) {
4866 len = sg_dma_len(sg);
4867 addr = sg_dma_address(sg);
4868
4869 if (len == 0)
4870 break;
4871
4872 while ((count + 1) < sgt->nents) {
4873 sg_next_iter = sg_next(sg);
4874 len_next = sg_dma_len(sg_next_iter);
4875 addr_next = sg_dma_address(sg_next_iter);
4876
4877 if (len_next == 0)
4878 break;
4879
4880 if ((addr + len == addr_next) &&
4881 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4882 len += len_next;
4883 count++;
4884 sg = sg_next_iter;
4885 } else {
4886 break;
4887 }
4888 }
4889
4890 dma_desc_cnt++;
4891 }
4892
4893 return dma_desc_cnt * sizeof(struct packet_lin_dma);
4894 }
4895
gaudi_pin_memory_before_cs(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,u64 addr,enum dma_data_direction dir)4896 static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
4897 struct hl_cs_parser *parser,
4898 struct packet_lin_dma *user_dma_pkt,
4899 u64 addr, enum dma_data_direction dir)
4900 {
4901 struct hl_userptr *userptr;
4902 int rc;
4903
4904 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4905 parser->job_userptr_list, &userptr))
4906 goto already_pinned;
4907
4908 userptr = kzalloc_obj(*userptr);
4909 if (!userptr)
4910 return -ENOMEM;
4911
4912 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
4913 userptr);
4914 if (rc)
4915 goto free_userptr;
4916
4917 list_add_tail(&userptr->job_node, parser->job_userptr_list);
4918
4919 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
4920 if (rc) {
4921 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
4922 goto unpin_memory;
4923 }
4924
4925 userptr->dma_mapped = true;
4926 userptr->dir = dir;
4927
4928 already_pinned:
4929 parser->patched_cb_size +=
4930 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
4931
4932 return 0;
4933
4934 unpin_memory:
4935 list_del(&userptr->job_node);
4936 hl_unpin_host_memory(hdev, userptr);
4937 free_userptr:
4938 kfree(userptr);
4939 return rc;
4940 }
4941
gaudi_validate_dma_pkt_host(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,bool src_in_host)4942 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
4943 struct hl_cs_parser *parser,
4944 struct packet_lin_dma *user_dma_pkt,
4945 bool src_in_host)
4946 {
4947 enum dma_data_direction dir;
4948 bool skip_host_mem_pin = false, user_memset;
4949 u64 addr;
4950 int rc = 0;
4951
4952 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
4953 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4954 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4955
4956 if (src_in_host) {
4957 if (user_memset)
4958 skip_host_mem_pin = true;
4959
4960 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
4961 dir = DMA_TO_DEVICE;
4962 addr = le64_to_cpu(user_dma_pkt->src_addr);
4963 } else {
4964 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4965 dir = DMA_FROM_DEVICE;
4966 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4967 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4968 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4969 }
4970
4971 if (skip_host_mem_pin)
4972 parser->patched_cb_size += sizeof(*user_dma_pkt);
4973 else
4974 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4975 addr, dir);
4976
4977 return rc;
4978 }
4979
gaudi_validate_dma_pkt_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt)4980 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4981 struct hl_cs_parser *parser,
4982 struct packet_lin_dma *user_dma_pkt)
4983 {
4984 bool src_in_host = false;
4985 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4986 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4987 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4988
4989 dev_dbg(hdev->dev, "DMA packet details:\n");
4990 dev_dbg(hdev->dev, "source == 0x%llx\n",
4991 le64_to_cpu(user_dma_pkt->src_addr));
4992 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4993 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4994
4995 /*
4996 * Special handling for DMA with size 0. Bypass all validations
4997 * because no transactions will be done except for WR_COMP, which
4998 * is not a security issue
4999 */
5000 if (!le32_to_cpu(user_dma_pkt->tsize)) {
5001 parser->patched_cb_size += sizeof(*user_dma_pkt);
5002 return 0;
5003 }
5004
5005 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5006 src_in_host = true;
5007
5008 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
5009 src_in_host);
5010 }
5011
gaudi_validate_load_and_exe_pkt(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_load_and_exe * user_pkt)5012 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
5013 struct hl_cs_parser *parser,
5014 struct packet_load_and_exe *user_pkt)
5015 {
5016 u32 cfg;
5017
5018 cfg = le32_to_cpu(user_pkt->cfg);
5019
5020 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
5021 dev_err(hdev->dev,
5022 "User not allowed to use Load and Execute\n");
5023 return -EPERM;
5024 }
5025
5026 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
5027
5028 return 0;
5029 }
5030
gaudi_validate_cb(struct hl_device * hdev,struct hl_cs_parser * parser,bool is_mmu)5031 static int gaudi_validate_cb(struct hl_device *hdev,
5032 struct hl_cs_parser *parser, bool is_mmu)
5033 {
5034 u32 cb_parsed_length = 0;
5035 int rc = 0;
5036
5037 parser->patched_cb_size = 0;
5038
5039 /* cb_user_size is more than 0 so loop will always be executed */
5040 while (cb_parsed_length < parser->user_cb_size) {
5041 enum packet_id pkt_id;
5042 u16 pkt_size;
5043 struct gaudi_packet *user_pkt;
5044
5045 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5046
5047 pkt_id = (enum packet_id) (
5048 (le64_to_cpu(user_pkt->header) &
5049 PACKET_HEADER_PACKET_ID_MASK) >>
5050 PACKET_HEADER_PACKET_ID_SHIFT);
5051
5052 if (!validate_packet_id(pkt_id)) {
5053 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5054 rc = -EINVAL;
5055 break;
5056 }
5057
5058 pkt_size = gaudi_packet_sizes[pkt_id];
5059 cb_parsed_length += pkt_size;
5060 if (cb_parsed_length > parser->user_cb_size) {
5061 dev_err(hdev->dev,
5062 "packet 0x%x is out of CB boundary\n", pkt_id);
5063 rc = -EINVAL;
5064 break;
5065 }
5066
5067 switch (pkt_id) {
5068 case PACKET_MSG_PROT:
5069 dev_err(hdev->dev,
5070 "User not allowed to use MSG_PROT\n");
5071 rc = -EPERM;
5072 break;
5073
5074 case PACKET_CP_DMA:
5075 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5076 rc = -EPERM;
5077 break;
5078
5079 case PACKET_STOP:
5080 dev_err(hdev->dev, "User not allowed to use STOP\n");
5081 rc = -EPERM;
5082 break;
5083
5084 case PACKET_WREG_BULK:
5085 dev_err(hdev->dev,
5086 "User not allowed to use WREG_BULK\n");
5087 rc = -EPERM;
5088 break;
5089
5090 case PACKET_LOAD_AND_EXE:
5091 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
5092 (struct packet_load_and_exe *) user_pkt);
5093 break;
5094
5095 case PACKET_LIN_DMA:
5096 parser->contains_dma_pkt = true;
5097 if (is_mmu)
5098 parser->patched_cb_size += pkt_size;
5099 else
5100 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
5101 (struct packet_lin_dma *) user_pkt);
5102 break;
5103
5104 case PACKET_WREG_32:
5105 case PACKET_MSG_LONG:
5106 case PACKET_MSG_SHORT:
5107 case PACKET_REPEAT:
5108 case PACKET_FENCE:
5109 case PACKET_NOP:
5110 case PACKET_ARB_POINT:
5111 parser->patched_cb_size += pkt_size;
5112 break;
5113
5114 default:
5115 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5116 pkt_id);
5117 rc = -EINVAL;
5118 break;
5119 }
5120
5121 if (rc)
5122 break;
5123 }
5124
5125 /*
5126 * The new CB should have space at the end for two MSG_PROT packets:
5127 * 1. Optional NOP padding for cacheline alignment
5128 * 2. A packet that will act as a completion packet
5129 * 3. A packet that will generate MSI interrupt
5130 */
5131 if (parser->completion)
5132 parser->patched_cb_size += gaudi_get_patched_cb_extra_size(
5133 parser->patched_cb_size);
5134
5135 return rc;
5136 }
5137
gaudi_patch_dma_packet(struct hl_device * hdev,struct hl_cs_parser * parser,struct packet_lin_dma * user_dma_pkt,struct packet_lin_dma * new_dma_pkt,u32 * new_dma_pkt_size)5138 static int gaudi_patch_dma_packet(struct hl_device *hdev,
5139 struct hl_cs_parser *parser,
5140 struct packet_lin_dma *user_dma_pkt,
5141 struct packet_lin_dma *new_dma_pkt,
5142 u32 *new_dma_pkt_size)
5143 {
5144 struct hl_userptr *userptr;
5145 struct scatterlist *sg, *sg_next_iter;
5146 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
5147 u64 len, len_next;
5148 dma_addr_t dma_addr, dma_addr_next;
5149 u64 device_memory_addr, addr;
5150 enum dma_data_direction dir;
5151 struct sg_table *sgt;
5152 bool src_in_host = false;
5153 bool skip_host_mem_pin = false;
5154 bool user_memset;
5155
5156 ctl = le32_to_cpu(user_dma_pkt->ctl);
5157
5158 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
5159 src_in_host = true;
5160
5161 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
5162 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
5163
5164 if (src_in_host) {
5165 addr = le64_to_cpu(user_dma_pkt->src_addr);
5166 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
5167 dir = DMA_TO_DEVICE;
5168 if (user_memset)
5169 skip_host_mem_pin = true;
5170 } else {
5171 addr = le64_to_cpu(user_dma_pkt->dst_addr);
5172 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
5173 dir = DMA_FROM_DEVICE;
5174 }
5175
5176 if ((!skip_host_mem_pin) &&
5177 (!hl_userptr_is_pinned(hdev, addr,
5178 le32_to_cpu(user_dma_pkt->tsize),
5179 parser->job_userptr_list, &userptr))) {
5180 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
5181 addr, user_dma_pkt->tsize);
5182 return -EFAULT;
5183 }
5184
5185 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
5186 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
5187 *new_dma_pkt_size = sizeof(*user_dma_pkt);
5188 return 0;
5189 }
5190
5191 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5192
5193 sgt = userptr->sgt;
5194 dma_desc_cnt = 0;
5195
5196 for_each_sgtable_dma_sg(sgt, sg, count) {
5197 len = sg_dma_len(sg);
5198 dma_addr = sg_dma_address(sg);
5199
5200 if (len == 0)
5201 break;
5202
5203 while ((count + 1) < sgt->nents) {
5204 sg_next_iter = sg_next(sg);
5205 len_next = sg_dma_len(sg_next_iter);
5206 dma_addr_next = sg_dma_address(sg_next_iter);
5207
5208 if (len_next == 0)
5209 break;
5210
5211 if ((dma_addr + len == dma_addr_next) &&
5212 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
5213 len += len_next;
5214 count++;
5215 sg = sg_next_iter;
5216 } else {
5217 break;
5218 }
5219 }
5220
5221 ctl = le32_to_cpu(user_dma_pkt->ctl);
5222 if (likely(dma_desc_cnt))
5223 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
5224 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
5225 new_dma_pkt->ctl = cpu_to_le32(ctl);
5226 new_dma_pkt->tsize = cpu_to_le32(len);
5227
5228 if (dir == DMA_TO_DEVICE) {
5229 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
5230 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
5231 } else {
5232 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
5233 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
5234 }
5235
5236 if (!user_memset)
5237 device_memory_addr += len;
5238 dma_desc_cnt++;
5239 new_dma_pkt++;
5240 }
5241
5242 if (!dma_desc_cnt) {
5243 dev_err(hdev->dev,
5244 "Error of 0 SG entries when patching DMA packet\n");
5245 return -EFAULT;
5246 }
5247
5248 /* Fix the last dma packet - wrcomp must be as user set it */
5249 new_dma_pkt--;
5250 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
5251
5252 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
5253
5254 return 0;
5255 }
5256
gaudi_patch_cb(struct hl_device * hdev,struct hl_cs_parser * parser)5257 static int gaudi_patch_cb(struct hl_device *hdev,
5258 struct hl_cs_parser *parser)
5259 {
5260 u32 cb_parsed_length = 0;
5261 u32 cb_patched_cur_length = 0;
5262 int rc = 0;
5263
5264 /* cb_user_size is more than 0 so loop will always be executed */
5265 while (cb_parsed_length < parser->user_cb_size) {
5266 enum packet_id pkt_id;
5267 u16 pkt_size;
5268 u32 new_pkt_size = 0;
5269 struct gaudi_packet *user_pkt, *kernel_pkt;
5270
5271 user_pkt = parser->user_cb->kernel_address + cb_parsed_length;
5272 kernel_pkt = parser->patched_cb->kernel_address +
5273 cb_patched_cur_length;
5274
5275 pkt_id = (enum packet_id) (
5276 (le64_to_cpu(user_pkt->header) &
5277 PACKET_HEADER_PACKET_ID_MASK) >>
5278 PACKET_HEADER_PACKET_ID_SHIFT);
5279
5280 if (!validate_packet_id(pkt_id)) {
5281 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id);
5282 rc = -EINVAL;
5283 break;
5284 }
5285
5286 pkt_size = gaudi_packet_sizes[pkt_id];
5287 cb_parsed_length += pkt_size;
5288 if (cb_parsed_length > parser->user_cb_size) {
5289 dev_err(hdev->dev,
5290 "packet 0x%x is out of CB boundary\n", pkt_id);
5291 rc = -EINVAL;
5292 break;
5293 }
5294
5295 switch (pkt_id) {
5296 case PACKET_LIN_DMA:
5297 rc = gaudi_patch_dma_packet(hdev, parser,
5298 (struct packet_lin_dma *) user_pkt,
5299 (struct packet_lin_dma *) kernel_pkt,
5300 &new_pkt_size);
5301 cb_patched_cur_length += new_pkt_size;
5302 break;
5303
5304 case PACKET_MSG_PROT:
5305 dev_err(hdev->dev,
5306 "User not allowed to use MSG_PROT\n");
5307 rc = -EPERM;
5308 break;
5309
5310 case PACKET_CP_DMA:
5311 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
5312 rc = -EPERM;
5313 break;
5314
5315 case PACKET_STOP:
5316 dev_err(hdev->dev, "User not allowed to use STOP\n");
5317 rc = -EPERM;
5318 break;
5319
5320 case PACKET_WREG_32:
5321 case PACKET_WREG_BULK:
5322 case PACKET_MSG_LONG:
5323 case PACKET_MSG_SHORT:
5324 case PACKET_REPEAT:
5325 case PACKET_FENCE:
5326 case PACKET_NOP:
5327 case PACKET_ARB_POINT:
5328 case PACKET_LOAD_AND_EXE:
5329 memcpy(kernel_pkt, user_pkt, pkt_size);
5330 cb_patched_cur_length += pkt_size;
5331 break;
5332
5333 default:
5334 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
5335 pkt_id);
5336 rc = -EINVAL;
5337 break;
5338 }
5339
5340 if (rc)
5341 break;
5342 }
5343
5344 return rc;
5345 }
5346
gaudi_parse_cb_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5347 static int gaudi_parse_cb_mmu(struct hl_device *hdev,
5348 struct hl_cs_parser *parser)
5349 {
5350 u64 handle;
5351 u32 patched_cb_size;
5352 struct hl_cb *user_cb;
5353 int rc;
5354
5355 /*
5356 * The new CB should have space at the end for two MSG_PROT packets:
5357 * 1. Optional NOP padding for cacheline alignment
5358 * 2. A packet that will act as a completion packet
5359 * 3. A packet that will generate MSI interrupt
5360 */
5361 if (parser->completion)
5362 parser->patched_cb_size = parser->user_cb_size +
5363 gaudi_get_patched_cb_extra_size(parser->user_cb_size);
5364 else
5365 parser->patched_cb_size = parser->user_cb_size;
5366
5367 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5368 parser->patched_cb_size, false, false,
5369 &handle);
5370
5371 if (rc) {
5372 dev_err(hdev->dev,
5373 "Failed to allocate patched CB for DMA CS %d\n",
5374 rc);
5375 return rc;
5376 }
5377
5378 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5379 /* hl_cb_get should never fail */
5380 if (!parser->patched_cb) {
5381 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5382 rc = -EFAULT;
5383 goto out;
5384 }
5385
5386 /*
5387 * We are protected from overflow because the check
5388 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk()
5389 * in the common code. That check is done only if is_kernel_allocated_cb is true.
5390 *
5391 * There is no option to reach here without going through that check because:
5392 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to
5393 * an external queue.
5394 * 2. For Gaudi, we only parse CBs that were submitted to the external queues.
5395 */
5396 memcpy(parser->patched_cb->kernel_address,
5397 parser->user_cb->kernel_address,
5398 parser->user_cb_size);
5399
5400 patched_cb_size = parser->patched_cb_size;
5401
5402 /* Validate patched CB instead of user CB */
5403 user_cb = parser->user_cb;
5404 parser->user_cb = parser->patched_cb;
5405 rc = gaudi_validate_cb(hdev, parser, true);
5406 parser->user_cb = user_cb;
5407
5408 if (rc) {
5409 hl_cb_put(parser->patched_cb);
5410 goto out;
5411 }
5412
5413 if (patched_cb_size != parser->patched_cb_size) {
5414 dev_err(hdev->dev, "user CB size mismatch\n");
5415 hl_cb_put(parser->patched_cb);
5416 rc = -EINVAL;
5417 goto out;
5418 }
5419
5420 out:
5421 /*
5422 * Always call cb destroy here because we still have 1 reference
5423 * to it by calling cb_get earlier. After the job will be completed,
5424 * cb_put will release it, but here we want to remove it from the
5425 * idr
5426 */
5427 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5428
5429 return rc;
5430 }
5431
gaudi_parse_cb_no_mmu(struct hl_device * hdev,struct hl_cs_parser * parser)5432 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
5433 struct hl_cs_parser *parser)
5434 {
5435 u64 handle;
5436 int rc;
5437
5438 rc = gaudi_validate_cb(hdev, parser, false);
5439
5440 if (rc)
5441 goto free_userptr;
5442
5443 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx,
5444 parser->patched_cb_size, false, false,
5445 &handle);
5446 if (rc) {
5447 dev_err(hdev->dev,
5448 "Failed to allocate patched CB for DMA CS %d\n", rc);
5449 goto free_userptr;
5450 }
5451
5452 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle);
5453 /* hl_cb_get should never fail here */
5454 if (!parser->patched_cb) {
5455 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle);
5456 rc = -EFAULT;
5457 goto out;
5458 }
5459
5460 rc = gaudi_patch_cb(hdev, parser);
5461
5462 if (rc)
5463 hl_cb_put(parser->patched_cb);
5464
5465 out:
5466 /*
5467 * Always call cb destroy here because we still have 1 reference
5468 * to it by calling cb_get earlier. After the job will be completed,
5469 * cb_put will release it, but here we want to remove it from the
5470 * idr
5471 */
5472 hl_cb_destroy(&hdev->kernel_mem_mgr, handle);
5473
5474 free_userptr:
5475 if (rc)
5476 hl_userptr_delete_list(hdev, parser->job_userptr_list);
5477 return rc;
5478 }
5479
gaudi_parse_cb_no_ext_queue(struct hl_device * hdev,struct hl_cs_parser * parser)5480 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
5481 struct hl_cs_parser *parser)
5482 {
5483 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
5484 struct gaudi_device *gaudi = hdev->asic_specific;
5485 u32 nic_queue_offset, nic_mask_q_id;
5486
5487 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) &&
5488 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) {
5489 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0;
5490 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2));
5491
5492 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) {
5493 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
5494 return -EINVAL;
5495 }
5496 }
5497
5498 /* For internal queue jobs just check if CB address is valid */
5499 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5500 parser->user_cb_size,
5501 asic_prop->sram_user_base_address,
5502 asic_prop->sram_end_address))
5503 return 0;
5504
5505 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5506 parser->user_cb_size,
5507 asic_prop->dram_user_base_address,
5508 asic_prop->dram_end_address))
5509 return 0;
5510
5511 /* PMMU and HPMMU addresses are equal, check only one of them */
5512 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
5513 parser->user_cb_size,
5514 asic_prop->pmmu.start_addr,
5515 asic_prop->pmmu.end_addr))
5516 return 0;
5517
5518 dev_err(hdev->dev,
5519 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
5520 parser->user_cb, parser->user_cb_size);
5521
5522 return -EFAULT;
5523 }
5524
gaudi_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)5525 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
5526 {
5527 struct gaudi_device *gaudi = hdev->asic_specific;
5528
5529 if (parser->queue_type == QUEUE_TYPE_INT)
5530 return gaudi_parse_cb_no_ext_queue(hdev, parser);
5531
5532 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
5533 return gaudi_parse_cb_mmu(hdev, parser);
5534 else
5535 return gaudi_parse_cb_no_mmu(hdev, parser);
5536 }
5537
gaudi_add_end_of_cb_packets(struct hl_device * hdev,void * kernel_address,u32 len,u32 original_len,u64 cq_addr,u32 cq_val,u32 msi_vec,bool eb)5538 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address,
5539 u32 len, u32 original_len, u64 cq_addr, u32 cq_val,
5540 u32 msi_vec, bool eb)
5541 {
5542 struct packet_msg_prot *cq_pkt;
5543 struct packet_nop *cq_padding;
5544 u64 msi_addr;
5545 u32 tmp;
5546
5547 cq_padding = kernel_address + original_len;
5548 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2);
5549
5550 while ((void *)cq_padding < (void *)cq_pkt) {
5551 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP));
5552 cq_padding++;
5553 }
5554
5555 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5556 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5557
5558 if (eb)
5559 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5560
5561 cq_pkt->ctl = cpu_to_le32(tmp);
5562 cq_pkt->value = cpu_to_le32(cq_val);
5563 cq_pkt->addr = cpu_to_le64(cq_addr);
5564
5565 cq_pkt++;
5566
5567 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
5568 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5569 cq_pkt->ctl = cpu_to_le32(tmp);
5570 cq_pkt->value = cpu_to_le32(1);
5571 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4;
5572 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr);
5573 }
5574
gaudi_update_eq_ci(struct hl_device * hdev,u32 val)5575 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
5576 {
5577 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
5578 }
5579
gaudi_memset_device_memory(struct hl_device * hdev,u64 addr,u32 size,u64 val)5580 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
5581 u32 size, u64 val)
5582 {
5583 struct packet_lin_dma *lin_dma_pkt;
5584 struct hl_cs_job *job;
5585 u32 cb_size, ctl, err_cause;
5586 struct hl_cb *cb;
5587 int rc;
5588
5589 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false);
5590 if (!cb)
5591 return -EFAULT;
5592
5593 lin_dma_pkt = cb->kernel_address;
5594 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
5595 cb_size = sizeof(*lin_dma_pkt);
5596
5597 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
5598 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
5599 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1);
5600 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5601 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5602
5603 lin_dma_pkt->ctl = cpu_to_le32(ctl);
5604 lin_dma_pkt->src_addr = cpu_to_le64(val);
5605 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
5606 lin_dma_pkt->tsize = cpu_to_le32(size);
5607
5608 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5609 if (!job) {
5610 dev_err(hdev->dev, "Failed to allocate a new job\n");
5611 rc = -ENOMEM;
5612 goto release_cb;
5613 }
5614
5615 /* Verify DMA is OK */
5616 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5617 if (err_cause && !hdev->init_done) {
5618 dev_dbg(hdev->dev,
5619 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5620 err_cause);
5621 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5622 }
5623
5624 job->id = 0;
5625 job->user_cb = cb;
5626 atomic_inc(&job->user_cb->cs_cnt);
5627 job->user_cb_size = cb_size;
5628 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5629 job->patched_cb = job->user_cb;
5630 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
5631
5632 hl_debugfs_add_job(hdev, job);
5633
5634 rc = gaudi_send_job_on_qman0(hdev, job);
5635 hl_debugfs_remove_job(hdev, job);
5636 kfree(job);
5637 atomic_dec(&cb->cs_cnt);
5638
5639 /* Verify DMA is OK */
5640 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE);
5641 if (err_cause) {
5642 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5643 rc = -EIO;
5644 if (!hdev->init_done) {
5645 dev_dbg(hdev->dev,
5646 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5647 err_cause);
5648 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause);
5649 }
5650 }
5651
5652 release_cb:
5653 hl_cb_put(cb);
5654 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5655
5656 return rc;
5657 }
5658
gaudi_memset_registers(struct hl_device * hdev,u64 reg_base,u32 num_regs,u32 val)5659 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
5660 u32 num_regs, u32 val)
5661 {
5662 struct packet_msg_long *pkt;
5663 struct hl_cs_job *job;
5664 u32 cb_size, ctl;
5665 struct hl_cb *cb;
5666 int i, rc;
5667
5668 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
5669
5670 if (cb_size > SZ_2M) {
5671 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
5672 return -ENOMEM;
5673 }
5674
5675 cb = hl_cb_kernel_create(hdev, cb_size, false);
5676 if (!cb)
5677 return -EFAULT;
5678
5679 pkt = cb->kernel_address;
5680
5681 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
5682 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
5683 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
5684 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
5685 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
5686
5687 for (i = 0; i < num_regs ; i++, pkt++) {
5688 pkt->ctl = cpu_to_le32(ctl);
5689 pkt->value = cpu_to_le32(val);
5690 pkt->addr = cpu_to_le64(reg_base + (i * 4));
5691 }
5692
5693 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
5694 if (!job) {
5695 dev_err(hdev->dev, "Failed to allocate a new job\n");
5696 rc = -ENOMEM;
5697 goto release_cb;
5698 }
5699
5700 job->id = 0;
5701 job->user_cb = cb;
5702 atomic_inc(&job->user_cb->cs_cnt);
5703 job->user_cb_size = cb_size;
5704 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
5705 job->patched_cb = job->user_cb;
5706 job->job_cb_size = cb_size;
5707
5708 hl_debugfs_add_job(hdev, job);
5709
5710 rc = gaudi_send_job_on_qman0(hdev, job);
5711 hl_debugfs_remove_job(hdev, job);
5712 kfree(job);
5713 atomic_dec(&cb->cs_cnt);
5714
5715 release_cb:
5716 hl_cb_put(cb);
5717 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle);
5718
5719 return rc;
5720 }
5721
gaudi_restore_sm_registers(struct hl_device * hdev)5722 static int gaudi_restore_sm_registers(struct hl_device *hdev)
5723 {
5724 u64 base_addr;
5725 u32 num_regs;
5726 int rc;
5727
5728 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5729 num_regs = NUM_OF_SOB_IN_BLOCK;
5730 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5731 if (rc) {
5732 dev_err(hdev->dev, "failed resetting SM registers");
5733 return -ENOMEM;
5734 }
5735
5736 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
5737 num_regs = NUM_OF_SOB_IN_BLOCK;
5738 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5739 if (rc) {
5740 dev_err(hdev->dev, "failed resetting SM registers");
5741 return -ENOMEM;
5742 }
5743
5744 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5745 num_regs = NUM_OF_SOB_IN_BLOCK;
5746 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5747 if (rc) {
5748 dev_err(hdev->dev, "failed resetting SM registers");
5749 return -ENOMEM;
5750 }
5751
5752 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5753 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5754 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5755 if (rc) {
5756 dev_err(hdev->dev, "failed resetting SM registers");
5757 return -ENOMEM;
5758 }
5759
5760 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
5761 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5762 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5763 if (rc) {
5764 dev_err(hdev->dev, "failed resetting SM registers");
5765 return -ENOMEM;
5766 }
5767
5768 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
5769 num_regs = NUM_OF_MONITORS_IN_BLOCK;
5770 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5771 if (rc) {
5772 dev_err(hdev->dev, "failed resetting SM registers");
5773 return -ENOMEM;
5774 }
5775
5776 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5777 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
5778 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
5779 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5780 if (rc) {
5781 dev_err(hdev->dev, "failed resetting SM registers");
5782 return -ENOMEM;
5783 }
5784
5785 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
5786 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
5787 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
5788 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
5789 if (rc) {
5790 dev_err(hdev->dev, "failed resetting SM registers");
5791 return -ENOMEM;
5792 }
5793
5794 return 0;
5795 }
5796
gaudi_restore_dma_registers(struct hl_device * hdev)5797 static void gaudi_restore_dma_registers(struct hl_device *hdev)
5798 {
5799 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
5800 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
5801 int i;
5802
5803 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5804 u64 sob_addr = CFG_BASE +
5805 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
5806 (i * sob_delta);
5807 u32 dma_offset = i * DMA_CORE_OFFSET;
5808
5809 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
5810 lower_32_bits(sob_addr));
5811 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
5812 upper_32_bits(sob_addr));
5813 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
5814
5815 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
5816 * modified by the user for SRAM reduction
5817 */
5818 if (i > 1)
5819 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
5820 0x00000001);
5821 }
5822 }
5823
gaudi_restore_qm_registers(struct hl_device * hdev)5824 static void gaudi_restore_qm_registers(struct hl_device *hdev)
5825 {
5826 u32 qman_offset;
5827 int i;
5828
5829 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
5830 qman_offset = i * DMA_QMAN_OFFSET;
5831 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
5832 }
5833
5834 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
5835 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
5836 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
5837 }
5838
5839 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
5840 qman_offset = i * TPC_QMAN_OFFSET;
5841 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
5842 }
5843
5844 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
5845 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET +
5846 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET;
5847 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0);
5848 }
5849 }
5850
gaudi_restore_user_registers(struct hl_device * hdev)5851 static int gaudi_restore_user_registers(struct hl_device *hdev)
5852 {
5853 int rc;
5854
5855 rc = gaudi_restore_sm_registers(hdev);
5856 if (rc)
5857 return rc;
5858
5859 gaudi_restore_dma_registers(hdev);
5860 gaudi_restore_qm_registers(hdev);
5861
5862 return 0;
5863 }
5864
gaudi_context_switch(struct hl_device * hdev,u32 asid)5865 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
5866 {
5867 return 0;
5868 }
5869
gaudi_mmu_clear_pgt_range(struct hl_device * hdev)5870 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
5871 {
5872 u32 size = hdev->asic_prop.mmu_pgt_size +
5873 hdev->asic_prop.mmu_cache_mng_size;
5874 struct gaudi_device *gaudi = hdev->asic_specific;
5875 u64 addr = hdev->asic_prop.mmu_pgt_addr;
5876
5877 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5878 return 0;
5879
5880 return gaudi_memset_device_memory(hdev, addr, size, 0);
5881 }
5882
gaudi_restore_phase_topology(struct hl_device * hdev)5883 static void gaudi_restore_phase_topology(struct hl_device *hdev)
5884 {
5885
5886 }
5887
gaudi_dma_core_transfer(struct hl_device * hdev,int dma_id,u64 addr,u32 size_to_dma,dma_addr_t dma_addr)5888 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr,
5889 u32 size_to_dma, dma_addr_t dma_addr)
5890 {
5891 u32 err_cause, val;
5892 u64 dma_offset;
5893 int rc;
5894
5895 dma_offset = dma_id * DMA_CORE_OFFSET;
5896
5897 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr));
5898 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr));
5899 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr));
5900 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr));
5901 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma);
5902 WREG32(mmDMA0_CORE_COMMIT + dma_offset,
5903 (1 << DMA0_CORE_COMMIT_LIN_SHIFT));
5904
5905 rc = hl_poll_timeout(
5906 hdev,
5907 mmDMA0_CORE_STS0 + dma_offset,
5908 val,
5909 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0),
5910 0,
5911 1000000);
5912
5913 if (rc) {
5914 dev_err(hdev->dev,
5915 "DMA %d timed-out during reading of 0x%llx\n",
5916 dma_id, addr);
5917 return -EIO;
5918 }
5919
5920 /* Verify DMA is OK */
5921 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5922 if (err_cause) {
5923 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause);
5924 dev_dbg(hdev->dev,
5925 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5926 err_cause);
5927 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5928
5929 return -EIO;
5930 }
5931
5932 return 0;
5933 }
5934
gaudi_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)5935 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size,
5936 void *blob_addr)
5937 {
5938 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma;
5939 u32 qm_glbl_sts0, qm_cgm_sts;
5940 u64 dma_offset, qm_offset;
5941 dma_addr_t dma_addr;
5942 void *kernel_addr;
5943 bool is_eng_idle;
5944 int rc = 0, dma_id;
5945
5946 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO);
5947
5948 if (!kernel_addr)
5949 return -ENOMEM;
5950
5951 hdev->asic_funcs->hw_queues_lock(hdev);
5952
5953 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
5954 dma_offset = dma_id * DMA_CORE_OFFSET;
5955 qm_offset = dma_id * DMA_QMAN_OFFSET;
5956 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5957 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5958 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5959 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5960 IS_DMA_IDLE(dma_core_sts0);
5961
5962 if (!is_eng_idle) {
5963 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
5964 dma_offset = dma_id * DMA_CORE_OFFSET;
5965 qm_offset = dma_id * DMA_QMAN_OFFSET;
5966 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset);
5967 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset);
5968 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset);
5969 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
5970 IS_DMA_IDLE(dma_core_sts0);
5971
5972 if (!is_eng_idle) {
5973 dev_err_ratelimited(hdev->dev,
5974 "Can't read via DMA because it is BUSY\n");
5975 rc = -EAGAIN;
5976 goto out;
5977 }
5978 }
5979
5980 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset);
5981 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset,
5982 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
5983
5984 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
5985 * using the compute ctx ASID, if exists. If not, use the kernel ctx
5986 * ASID
5987 */
5988 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5989
5990 /* Verify DMA is OK */
5991 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5992 if (err_cause) {
5993 dev_dbg(hdev->dev,
5994 "Clearing DMA0 engine from errors (cause 0x%x)\n",
5995 err_cause);
5996 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause);
5997 }
5998
5999 pos = 0;
6000 size_left = size;
6001 size_to_dma = SZ_2M;
6002
6003 while (size_left > 0) {
6004
6005 if (size_left < SZ_2M)
6006 size_to_dma = size_left;
6007
6008 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma,
6009 dma_addr);
6010 if (rc)
6011 break;
6012
6013 memcpy(blob_addr + pos, kernel_addr, size_to_dma);
6014
6015 if (size_left <= SZ_2M)
6016 break;
6017
6018 pos += SZ_2M;
6019 addr += SZ_2M;
6020 size_left -= SZ_2M;
6021 }
6022
6023 /* TODO: remove this by mapping the DMA temporary buffer to the MMU
6024 * using the compute ctx ASID, if exists. If not, use the kernel ctx
6025 * ASID
6026 */
6027 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
6028 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
6029
6030 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1);
6031
6032 out:
6033 hdev->asic_funcs->hw_queues_unlock(hdev);
6034
6035 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr);
6036
6037 return rc;
6038 }
6039
gaudi_read_pte(struct hl_device * hdev,u64 addr)6040 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
6041 {
6042 struct gaudi_device *gaudi = hdev->asic_specific;
6043
6044 if (hdev->reset_info.hard_reset_pending)
6045 return U64_MAX;
6046
6047 return readq(hdev->pcie_bar[HBM_BAR_ID] +
6048 (addr - gaudi->hbm_bar_cur_addr));
6049 }
6050
gaudi_write_pte(struct hl_device * hdev,u64 addr,u64 val)6051 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
6052 {
6053 struct gaudi_device *gaudi = hdev->asic_specific;
6054
6055 if (hdev->reset_info.hard_reset_pending)
6056 return;
6057
6058 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
6059 (addr - gaudi->hbm_bar_cur_addr));
6060 }
6061
gaudi_mmu_prepare_reg(struct hl_device * hdev,u64 reg,u32 asid)6062 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
6063 {
6064 /* mask to zero the MMBP and ASID bits */
6065 WREG32_AND(reg, ~0x7FF);
6066 WREG32_OR(reg, asid);
6067 }
6068
gaudi_mmu_prepare(struct hl_device * hdev,u32 asid)6069 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
6070 {
6071 struct gaudi_device *gaudi = hdev->asic_specific;
6072
6073 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6074 return;
6075
6076 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
6077 dev_crit(hdev->dev, "asid %u is too big\n", asid);
6078 return;
6079 }
6080
6081 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6082 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6083 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6084 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6085 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6086
6087 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6088 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6089 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6090 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6091 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6092
6093 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6094 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6095 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6096 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6097 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6098
6099 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6100 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6101 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6102 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6103 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6104
6105 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6106 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6107 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6108 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6109 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6110
6111 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6112 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6113 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6114 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6115 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6116
6117 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6118 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6119 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6120 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6121 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6122
6123 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6124 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6125 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6126 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6127 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6128
6129 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
6130 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
6131 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
6132 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
6133 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
6134 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
6135 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
6136 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
6137
6138 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6139 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6140 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6141 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6142 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6143 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
6144 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
6145
6146 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
6147 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
6148 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
6149 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
6150 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
6151 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
6152 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
6153
6154 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6155 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6156 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6157 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6158 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6159 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
6160 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
6161
6162 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
6163 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
6164 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
6165 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
6166 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
6167 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
6168 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
6169
6170 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
6171 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
6172 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
6173 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
6174 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
6175 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
6176 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
6177
6178 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
6179 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
6180 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
6181 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
6182 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
6183 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
6184 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
6185
6186 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
6187 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
6188 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
6189 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
6190 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
6191 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
6192 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
6193
6194 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
6195 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
6196 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
6197 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
6198 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
6199 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
6200 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
6201
6202 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
6203 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
6204 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
6205 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
6206 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
6207 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
6208 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
6209 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
6210 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
6211 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
6212
6213 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
6214 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
6215 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
6216 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
6217 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
6218 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
6219 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
6220 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
6221 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
6222 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
6223 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
6224 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
6225
6226 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) {
6227 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0,
6228 asid);
6229 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1,
6230 asid);
6231 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2,
6232 asid);
6233 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3,
6234 asid);
6235 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4,
6236 asid);
6237 }
6238
6239 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) {
6240 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0,
6241 asid);
6242 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1,
6243 asid);
6244 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2,
6245 asid);
6246 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3,
6247 asid);
6248 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4,
6249 asid);
6250 }
6251
6252 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) {
6253 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0,
6254 asid);
6255 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1,
6256 asid);
6257 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2,
6258 asid);
6259 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3,
6260 asid);
6261 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4,
6262 asid);
6263 }
6264
6265 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) {
6266 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0,
6267 asid);
6268 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1,
6269 asid);
6270 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2,
6271 asid);
6272 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3,
6273 asid);
6274 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4,
6275 asid);
6276 }
6277
6278 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) {
6279 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0,
6280 asid);
6281 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1,
6282 asid);
6283 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2,
6284 asid);
6285 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3,
6286 asid);
6287 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4,
6288 asid);
6289 }
6290
6291 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) {
6292 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0,
6293 asid);
6294 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1,
6295 asid);
6296 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2,
6297 asid);
6298 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3,
6299 asid);
6300 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4,
6301 asid);
6302 }
6303
6304 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) {
6305 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0,
6306 asid);
6307 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1,
6308 asid);
6309 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2,
6310 asid);
6311 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3,
6312 asid);
6313 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4,
6314 asid);
6315 }
6316
6317 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) {
6318 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0,
6319 asid);
6320 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1,
6321 asid);
6322 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2,
6323 asid);
6324 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3,
6325 asid);
6326 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4,
6327 asid);
6328 }
6329
6330 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) {
6331 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0,
6332 asid);
6333 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1,
6334 asid);
6335 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2,
6336 asid);
6337 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3,
6338 asid);
6339 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4,
6340 asid);
6341 }
6342
6343 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) {
6344 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0,
6345 asid);
6346 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1,
6347 asid);
6348 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2,
6349 asid);
6350 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3,
6351 asid);
6352 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4,
6353 asid);
6354 }
6355
6356 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
6357 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
6358 }
6359
gaudi_send_job_on_qman0(struct hl_device * hdev,struct hl_cs_job * job)6360 static int gaudi_send_job_on_qman0(struct hl_device *hdev,
6361 struct hl_cs_job *job)
6362 {
6363 struct packet_msg_prot *fence_pkt;
6364 u32 *fence_ptr;
6365 dma_addr_t fence_dma_addr;
6366 struct hl_cb *cb;
6367 u32 tmp, timeout, dma_offset;
6368 int rc;
6369
6370 if (hdev->pldm)
6371 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
6372 else
6373 timeout = HL_DEVICE_TIMEOUT_USEC;
6374
6375 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr);
6376 if (!fence_ptr) {
6377 dev_err(hdev->dev,
6378 "Failed to allocate fence memory for QMAN0\n");
6379 return -ENOMEM;
6380 }
6381
6382 cb = job->patched_cb;
6383
6384 fence_pkt = cb->kernel_address +
6385 job->job_cb_size - sizeof(struct packet_msg_prot);
6386
6387 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT);
6388 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
6389 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
6390
6391 fence_pkt->ctl = cpu_to_le32(tmp);
6392 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
6393 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
6394
6395 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
6396
6397 WREG32(mmDMA0_CORE_PROT + dma_offset,
6398 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
6399
6400 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
6401 job->job_cb_size, cb->bus_address);
6402 if (rc) {
6403 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
6404 goto free_fence_ptr;
6405 }
6406
6407 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
6408 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
6409 timeout, true);
6410
6411 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
6412
6413 if (rc == -ETIMEDOUT) {
6414 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
6415 goto free_fence_ptr;
6416 }
6417
6418 free_fence_ptr:
6419 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
6420
6421 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr);
6422 return rc;
6423 }
6424
gaudi_get_event_desc(u16 event_type,char * desc,size_t size)6425 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
6426 {
6427 if (event_type >= GAUDI_EVENT_SIZE)
6428 goto event_not_supported;
6429
6430 if (!gaudi_irq_map_table[event_type].valid)
6431 goto event_not_supported;
6432
6433 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
6434
6435 return;
6436
6437 event_not_supported:
6438 snprintf(desc, size, "N/A");
6439 }
6440
gaudi_get_razwi_initiator_dma_name(struct hl_device * hdev,u32 x_y,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6441 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
6442 bool is_write, u16 *engine_id_1,
6443 u16 *engine_id_2)
6444 {
6445 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
6446
6447 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
6448 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
6449
6450 switch (x_y) {
6451 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6453 dma_id[0] = 0;
6454 dma_id[1] = 2;
6455 break;
6456 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6458 dma_id[0] = 1;
6459 dma_id[1] = 3;
6460 break;
6461 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6462 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6463 dma_id[0] = 4;
6464 dma_id[1] = 6;
6465 break;
6466 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6467 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6468 dma_id[0] = 5;
6469 dma_id[1] = 7;
6470 break;
6471 default:
6472 goto unknown_initiator;
6473 }
6474
6475 for (i = 0 ; i < 2 ; i++) {
6476 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
6477 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
6478 }
6479
6480 switch (x_y) {
6481 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6482 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6483 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6484 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6485 return "DMA0";
6486 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6487 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2;
6488 return "DMA2";
6489 } else {
6490 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0;
6491 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2;
6492 return "DMA0 or DMA2";
6493 }
6494 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6495 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6496 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6497 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6498 return "DMA1";
6499 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6500 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3;
6501 return "DMA3";
6502 } else {
6503 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1;
6504 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3;
6505 return "DMA1 or DMA3";
6506 }
6507 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6508 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6509 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6510 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6511 return "DMA4";
6512 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6513 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6;
6514 return "DMA6";
6515 } else {
6516 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4;
6517 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6;
6518 return "DMA4 or DMA6";
6519 }
6520 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6521 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6522 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) {
6523 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6524 return "DMA5";
6525 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) {
6526 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7;
6527 return "DMA7";
6528 } else {
6529 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5;
6530 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7;
6531 return "DMA5 or DMA7";
6532 }
6533 }
6534
6535 unknown_initiator:
6536 return "unknown initiator";
6537 }
6538
gaudi_get_razwi_initiator_name(struct hl_device * hdev,bool is_write,u16 * engine_id_1,u16 * engine_id_2)6539 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
6540 u16 *engine_id_1, u16 *engine_id_2)
6541 {
6542 u32 val, x_y, axi_id;
6543
6544 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
6545 RREG32(mmMMU_UP_RAZWI_READ_ID);
6546 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
6547 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
6548 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
6549 RAZWI_INITIATOR_AXI_ID_SHIFT);
6550
6551 switch (x_y) {
6552 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
6553 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6554 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0;
6555 return "TPC0";
6556 }
6557 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6558 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0;
6559 return "NIC0";
6560 }
6561 break;
6562 case RAZWI_INITIATOR_ID_X_Y_TPC1:
6563 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1;
6564 return "TPC1";
6565 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
6566 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
6567 *engine_id_1 = GAUDI_ENGINE_ID_MME_0;
6568 return "MME0";
6569 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
6570 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
6571 *engine_id_1 = GAUDI_ENGINE_ID_MME_1;
6572 return "MME1";
6573 case RAZWI_INITIATOR_ID_X_Y_TPC2:
6574 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2;
6575 return "TPC2";
6576 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
6577 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6578 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3;
6579 return "TPC3";
6580 }
6581 /* PCI, CPU or PSOC does not have engine id*/
6582 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
6583 return "PCI";
6584 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
6585 return "CPU";
6586 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
6587 return "PSOC";
6588 break;
6589 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
6590 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
6591 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
6592 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
6593 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
6594 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
6595 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
6596 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
6597 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write,
6598 engine_id_1, engine_id_2);
6599 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
6600 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6601 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4;
6602 return "TPC4";
6603 }
6604 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6605 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1;
6606 return "NIC1";
6607 }
6608 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6609 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2;
6610 return "NIC2";
6611 }
6612 break;
6613 case RAZWI_INITIATOR_ID_X_Y_TPC5:
6614 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5;
6615 return "TPC5";
6616 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
6617 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
6618 *engine_id_1 = GAUDI_ENGINE_ID_MME_2;
6619 return "MME2";
6620 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
6621 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
6622 *engine_id_1 = GAUDI_ENGINE_ID_MME_3;
6623 return "MME3";
6624 case RAZWI_INITIATOR_ID_X_Y_TPC6:
6625 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6;
6626 return "TPC6";
6627 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
6628 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) {
6629 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7;
6630 return "TPC7";
6631 }
6632 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) {
6633 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4;
6634 return "NIC4";
6635 }
6636 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) {
6637 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5;
6638 return "NIC5";
6639 }
6640 break;
6641 default:
6642 break;
6643 }
6644
6645 dev_err(hdev->dev,
6646 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
6647 val,
6648 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
6649 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
6650 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
6651 RAZWI_INITIATOR_AXI_ID_MASK);
6652
6653 return "unknown initiator";
6654 }
6655
gaudi_print_and_get_razwi_info(struct hl_device * hdev,u16 * engine_id_1,u16 * engine_id_2,bool * is_read,bool * is_write)6656 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
6657 u16 *engine_id_2, bool *is_read, bool *is_write)
6658 {
6659
6660 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
6661 dev_err_ratelimited(hdev->dev,
6662 "RAZWI event caused by illegal write of %s\n",
6663 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
6664 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
6665 *is_write = true;
6666 }
6667
6668 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
6669 dev_err_ratelimited(hdev->dev,
6670 "RAZWI event caused by illegal read of %s\n",
6671 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
6672 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
6673 *is_read = true;
6674 }
6675 }
6676
gaudi_print_and_get_mmu_error_info(struct hl_device * hdev,u64 * addr,u64 * event_mask)6677 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
6678 {
6679 struct gaudi_device *gaudi = hdev->asic_specific;
6680 u32 val;
6681
6682 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
6683 return;
6684
6685 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
6686 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6687 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
6688 *addr <<= 32;
6689 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
6690
6691 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
6692 hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
6693
6694 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
6695 }
6696
6697 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
6698 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
6699 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
6700 *addr <<= 32;
6701 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
6702
6703 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
6704
6705 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
6706 }
6707 }
6708
6709 /*
6710 * +-------------------+------------------------------------------------------+
6711 * | Configuration Reg | Description |
6712 * | Address | |
6713 * +-------------------+------------------------------------------------------+
6714 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
6715 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
6716 * | |0xF34 memory wrappers 63:32 |
6717 * | |0xF38 memory wrappers 95:64 |
6718 * | |0xF3C memory wrappers 127:96 |
6719 * +-------------------+------------------------------------------------------+
6720 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
6721 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
6722 * | |0xF44 memory wrappers 63:32 |
6723 * | |0xF48 memory wrappers 95:64 |
6724 * | |0xF4C memory wrappers 127:96 |
6725 * +-------------------+------------------------------------------------------+
6726 */
gaudi_extract_ecc_info(struct hl_device * hdev,struct ecc_info_extract_params * params,u64 * ecc_address,u64 * ecc_syndrom,u8 * memory_wrapper_idx)6727 static int gaudi_extract_ecc_info(struct hl_device *hdev,
6728 struct ecc_info_extract_params *params, u64 *ecc_address,
6729 u64 *ecc_syndrom, u8 *memory_wrapper_idx)
6730 {
6731 u32 i, num_mem_regs, reg, err_bit;
6732 u64 err_addr, err_word = 0;
6733
6734 num_mem_regs = params->num_memories / 32 +
6735 ((params->num_memories % 32) ? 1 : 0);
6736
6737 if (params->block_address >= CFG_BASE)
6738 params->block_address -= CFG_BASE;
6739
6740 if (params->derr)
6741 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET;
6742 else
6743 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET;
6744
6745 /* Set invalid wrapper index */
6746 *memory_wrapper_idx = 0xFF;
6747
6748 /* Iterate through memory wrappers, a single bit must be set */
6749 for (i = 0 ; i < num_mem_regs ; i++) {
6750 err_addr += i * 4;
6751 err_word = RREG32(err_addr);
6752 if (err_word) {
6753 err_bit = __ffs(err_word);
6754 *memory_wrapper_idx = err_bit + (32 * i);
6755 break;
6756 }
6757 }
6758
6759 if (*memory_wrapper_idx == 0xFF) {
6760 dev_err(hdev->dev, "ECC error information cannot be found\n");
6761 return -EINVAL;
6762 }
6763
6764 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET,
6765 *memory_wrapper_idx);
6766
6767 *ecc_address =
6768 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET);
6769 *ecc_syndrom =
6770 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET);
6771
6772 /* Clear error indication */
6773 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET);
6774 if (params->derr)
6775 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1);
6776 else
6777 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1);
6778
6779 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg);
6780
6781 return 0;
6782 }
6783
6784 /*
6785 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap
6786 *
6787 * @idx: the current pi/ci value
6788 * @q_len: the queue length (power of 2)
6789 *
6790 * @return the cyclically decremented index
6791 */
gaudi_queue_idx_dec(u32 idx,u32 q_len)6792 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len)
6793 {
6794 u32 mask = q_len - 1;
6795
6796 /*
6797 * modular decrement is equivalent to adding (queue_size -1)
6798 * later we take LSBs to make sure the value is in the
6799 * range [0, queue_len - 1]
6800 */
6801 return (idx + q_len - 1) & mask;
6802 }
6803
6804 /**
6805 * gaudi_handle_sw_config_stream_data - print SW config stream data
6806 *
6807 * @hdev: pointer to the habanalabs device structure
6808 * @stream: the QMAN's stream
6809 * @qman_base: base address of QMAN registers block
6810 * @event_mask: mask of the last events occurred
6811 */
gaudi_handle_sw_config_stream_data(struct hl_device * hdev,u32 stream,u64 qman_base,u64 event_mask)6812 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream,
6813 u64 qman_base, u64 event_mask)
6814 {
6815 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr;
6816 u32 cq_ptr_lo_off, size;
6817
6818 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0;
6819
6820 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) +
6821 stream * cq_ptr_lo_off;
6822 cq_ptr_hi = cq_ptr_lo +
6823 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0);
6824 cq_tsize = cq_ptr_lo +
6825 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0);
6826
6827 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
6828 size = RREG32(cq_tsize);
6829 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
6830 stream, cq_ptr, size);
6831
6832 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6833 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr;
6834 hdev->captured_err_info.undef_opcode.cq_size = size;
6835 hdev->captured_err_info.undef_opcode.stream_id = stream;
6836 }
6837 }
6838
6839 /**
6840 * gaudi_handle_last_pqes_on_err - print last PQEs on error
6841 *
6842 * @hdev: pointer to the habanalabs device structure
6843 * @qid_base: first QID of the QMAN (out of 4 streams)
6844 * @stream: the QMAN's stream
6845 * @qman_base: base address of QMAN registers block
6846 * @event_mask: mask of the last events occurred
6847 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE)
6848 */
gaudi_handle_last_pqes_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask,bool pr_sw_conf)6849 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
6850 u32 stream, u64 qman_base,
6851 u64 event_mask,
6852 bool pr_sw_conf)
6853 {
6854 u32 ci, qm_ci_stream_off, queue_len;
6855 struct hl_hw_queue *q;
6856 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE];
6857 int i;
6858
6859 q = &hdev->kernel_queues[qid_base + stream];
6860
6861 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0;
6862 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) +
6863 stream * qm_ci_stream_off;
6864
6865 queue_len = (q->queue_type == QUEUE_TYPE_INT) ?
6866 q->int_queue_len : HL_QUEUE_LENGTH;
6867
6868 hdev->asic_funcs->hw_queues_lock(hdev);
6869
6870 if (pr_sw_conf)
6871 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6872
6873 ci = RREG32(pq_ci);
6874
6875 /* we should start printing form ci -1 */
6876 ci = gaudi_queue_idx_dec(ci, queue_len);
6877 memset(addr, 0, sizeof(addr));
6878
6879 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) {
6880 struct hl_bd *bd;
6881 u32 len;
6882
6883 bd = q->kernel_address;
6884 bd += ci;
6885
6886 len = le32_to_cpu(bd->len);
6887 /* len 0 means uninitialized entry- break */
6888 if (!len)
6889 break;
6890
6891 addr[i] = le64_to_cpu(bd->ptr);
6892
6893 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
6894 stream, ci, addr[i], len);
6895
6896 /* get previous ci, wrap if needed */
6897 ci = gaudi_queue_idx_dec(ci, queue_len);
6898 }
6899
6900 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) {
6901 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
6902 u32 arr_idx = undef_opcode->cb_addr_streams_len;
6903
6904 if (arr_idx == 0) {
6905 undef_opcode->timestamp = ktime_get();
6906 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base];
6907 }
6908
6909 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr));
6910 undef_opcode->cb_addr_streams_len++;
6911 }
6912
6913 hdev->asic_funcs->hw_queues_unlock(hdev);
6914 }
6915
6916 /**
6917 * handle_qman_data_on_err - extract QMAN data on error
6918 *
6919 * @hdev: pointer to the habanalabs device structure
6920 * @qid_base: first QID of the QMAN (out of 4 streams)
6921 * @stream: the QMAN's stream
6922 * @qman_base: base address of QMAN registers block
6923 * @event_mask: mask of the last events occurred
6924 *
6925 * This function attempt to exatract as much data as possible on QMAN error.
6926 * On upper CP print the SW config stream data and last 8 PQEs.
6927 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs
6928 */
handle_qman_data_on_err(struct hl_device * hdev,u32 qid_base,u32 stream,u64 qman_base,u64 event_mask)6929 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base,
6930 u32 stream, u64 qman_base, u64 event_mask)
6931 {
6932 u32 i;
6933
6934 if (stream != QMAN_STREAMS) {
6935 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream,
6936 qman_base, event_mask, true);
6937 return;
6938 }
6939
6940 /* handle Lower-CP */
6941 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask);
6942
6943 for (i = 0; i < QMAN_STREAMS; i++)
6944 gaudi_handle_last_pqes_on_err(hdev, qid_base, i,
6945 qman_base, event_mask, false);
6946 }
6947
gaudi_handle_qman_err_generic(struct hl_device * hdev,const char * qm_name,u64 qman_base,u32 qid_base,u64 * event_mask)6948 static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
6949 const char *qm_name,
6950 u64 qman_base,
6951 u32 qid_base,
6952 u64 *event_mask)
6953 {
6954 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
6955 u64 glbl_sts_addr, arb_err_addr;
6956 char reg_desc[32];
6957
6958 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE);
6959 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE);
6960
6961 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
6962 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
6963 glbl_sts_clr_val = 0;
6964 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
6965
6966 if (!glbl_sts_val)
6967 continue;
6968
6969 if (i == QMAN_STREAMS)
6970 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
6971 else
6972 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
6973
6974 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
6975 if (glbl_sts_val & BIT(j)) {
6976 dev_err_ratelimited(hdev->dev,
6977 "%s %s. err cause: %s\n",
6978 qm_name, reg_desc,
6979 gaudi_qman_error_cause[j]);
6980 glbl_sts_clr_val |= BIT(j);
6981 }
6982 }
6983 /* check for undefined opcode */
6984 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK &&
6985 hdev->captured_err_info.undef_opcode.write_enable) {
6986 memset(&hdev->captured_err_info.undef_opcode, 0,
6987 sizeof(hdev->captured_err_info.undef_opcode));
6988
6989 hdev->captured_err_info.undef_opcode.write_enable = false;
6990 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
6991 }
6992
6993 /* Write 1 clear errors */
6994 if (!hdev->stop_on_err)
6995 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
6996 else
6997 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask);
6998 }
6999
7000 arb_err_val = RREG32(arb_err_addr);
7001
7002 if (!arb_err_val)
7003 return;
7004
7005 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7006 if (arb_err_val & BIT(j)) {
7007 dev_err_ratelimited(hdev->dev,
7008 "%s ARB_ERR. err cause: %s\n",
7009 qm_name,
7010 gaudi_qman_arb_error_cause[j]);
7011 }
7012 }
7013 }
7014
gaudi_print_sm_sei_info(struct hl_device * hdev,u16 event_type,struct hl_eq_sm_sei_data * sei_data)7015 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
7016 struct hl_eq_sm_sei_data *sei_data)
7017 {
7018 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
7019
7020 /* Flip the bits as the enum is ordered in the opposite way */
7021 index = (index ^ 0x3) & 0x3;
7022
7023 switch (sei_data->sei_cause) {
7024 case SM_SEI_SO_OVERFLOW:
7025 dev_err_ratelimited(hdev->dev,
7026 "%s SEI Error: SOB Group %u overflow/underflow",
7027 gaudi_sync_manager_names[index],
7028 le32_to_cpu(sei_data->sei_log));
7029 break;
7030 case SM_SEI_LBW_4B_UNALIGNED:
7031 dev_err_ratelimited(hdev->dev,
7032 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
7033 gaudi_sync_manager_names[index],
7034 le32_to_cpu(sei_data->sei_log));
7035 break;
7036 case SM_SEI_AXI_RESPONSE_ERR:
7037 dev_err_ratelimited(hdev->dev,
7038 "%s SEI Error: AXI ID %u response error",
7039 gaudi_sync_manager_names[index],
7040 le32_to_cpu(sei_data->sei_log));
7041 break;
7042 default:
7043 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
7044 le32_to_cpu(sei_data->sei_log));
7045 break;
7046 }
7047 }
7048
gaudi_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7049 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7050 struct hl_eq_ecc_data *ecc_data)
7051 {
7052 struct ecc_info_extract_params params;
7053 u64 ecc_address = 0, ecc_syndrom = 0;
7054 u8 index, memory_wrapper_idx = 0;
7055 bool extract_info_from_fw;
7056 int rc;
7057
7058 if (hdev->asic_prop.fw_security_enabled) {
7059 extract_info_from_fw = true;
7060 goto extract_ecc_info;
7061 }
7062
7063 switch (event_type) {
7064 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
7065 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
7066 extract_info_from_fw = true;
7067 break;
7068 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7069 index = event_type - GAUDI_EVENT_TPC0_SERR;
7070 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7071 params.num_memories = 90;
7072 params.derr = false;
7073 extract_info_from_fw = false;
7074 break;
7075 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7076 index = event_type - GAUDI_EVENT_TPC0_DERR;
7077 params.block_address =
7078 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
7079 params.num_memories = 90;
7080 params.derr = true;
7081 extract_info_from_fw = false;
7082 break;
7083 case GAUDI_EVENT_MME0_ACC_SERR:
7084 case GAUDI_EVENT_MME1_ACC_SERR:
7085 case GAUDI_EVENT_MME2_ACC_SERR:
7086 case GAUDI_EVENT_MME3_ACC_SERR:
7087 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
7088 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7089 params.num_memories = 128;
7090 params.derr = false;
7091 extract_info_from_fw = false;
7092 break;
7093 case GAUDI_EVENT_MME0_ACC_DERR:
7094 case GAUDI_EVENT_MME1_ACC_DERR:
7095 case GAUDI_EVENT_MME2_ACC_DERR:
7096 case GAUDI_EVENT_MME3_ACC_DERR:
7097 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
7098 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
7099 params.num_memories = 128;
7100 params.derr = true;
7101 extract_info_from_fw = false;
7102 break;
7103 case GAUDI_EVENT_MME0_SBAB_SERR:
7104 case GAUDI_EVENT_MME1_SBAB_SERR:
7105 case GAUDI_EVENT_MME2_SBAB_SERR:
7106 case GAUDI_EVENT_MME3_SBAB_SERR:
7107 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
7108 params.block_address =
7109 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7110 params.num_memories = 33;
7111 params.derr = false;
7112 extract_info_from_fw = false;
7113 break;
7114 case GAUDI_EVENT_MME0_SBAB_DERR:
7115 case GAUDI_EVENT_MME1_SBAB_DERR:
7116 case GAUDI_EVENT_MME2_SBAB_DERR:
7117 case GAUDI_EVENT_MME3_SBAB_DERR:
7118 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
7119 params.block_address =
7120 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
7121 params.num_memories = 33;
7122 params.derr = true;
7123 extract_info_from_fw = false;
7124 break;
7125 default:
7126 return;
7127 }
7128
7129 extract_ecc_info:
7130 if (extract_info_from_fw) {
7131 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7132 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
7133 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7134 } else {
7135 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address,
7136 &ecc_syndrom, &memory_wrapper_idx);
7137 if (rc)
7138 return;
7139 }
7140
7141 dev_err(hdev->dev,
7142 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n",
7143 ecc_address, ecc_syndrom, memory_wrapper_idx);
7144 }
7145
gaudi_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)7146 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7147 {
7148 u64 qman_base;
7149 char desc[32];
7150 u32 qid_base;
7151 u8 index;
7152
7153 switch (event_type) {
7154 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7155 index = event_type - GAUDI_EVENT_TPC0_QM;
7156 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS;
7157 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET;
7158 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
7159 break;
7160 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7161 if (event_type == GAUDI_EVENT_MME0_QM) {
7162 index = 0;
7163 qid_base = GAUDI_QUEUE_ID_MME_0_0;
7164 } else { /* event_type == GAUDI_EVENT_MME2_QM */
7165 index = 2;
7166 qid_base = GAUDI_QUEUE_ID_MME_1_0;
7167 }
7168 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET;
7169 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
7170 break;
7171 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7172 index = event_type - GAUDI_EVENT_DMA0_QM;
7173 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS;
7174 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */
7175 if (index > 1)
7176 qid_base++;
7177 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET;
7178 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
7179 break;
7180 case GAUDI_EVENT_NIC0_QM0:
7181 qid_base = GAUDI_QUEUE_ID_NIC_0_0;
7182 qman_base = mmNIC0_QM0_BASE;
7183 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0");
7184 break;
7185 case GAUDI_EVENT_NIC0_QM1:
7186 qid_base = GAUDI_QUEUE_ID_NIC_1_0;
7187 qman_base = mmNIC0_QM1_BASE;
7188 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1");
7189 break;
7190 case GAUDI_EVENT_NIC1_QM0:
7191 qid_base = GAUDI_QUEUE_ID_NIC_2_0;
7192 qman_base = mmNIC1_QM0_BASE;
7193 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0");
7194 break;
7195 case GAUDI_EVENT_NIC1_QM1:
7196 qid_base = GAUDI_QUEUE_ID_NIC_3_0;
7197 qman_base = mmNIC1_QM1_BASE;
7198 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1");
7199 break;
7200 case GAUDI_EVENT_NIC2_QM0:
7201 qid_base = GAUDI_QUEUE_ID_NIC_4_0;
7202 qman_base = mmNIC2_QM0_BASE;
7203 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0");
7204 break;
7205 case GAUDI_EVENT_NIC2_QM1:
7206 qid_base = GAUDI_QUEUE_ID_NIC_5_0;
7207 qman_base = mmNIC2_QM1_BASE;
7208 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1");
7209 break;
7210 case GAUDI_EVENT_NIC3_QM0:
7211 qid_base = GAUDI_QUEUE_ID_NIC_6_0;
7212 qman_base = mmNIC3_QM0_BASE;
7213 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0");
7214 break;
7215 case GAUDI_EVENT_NIC3_QM1:
7216 qid_base = GAUDI_QUEUE_ID_NIC_7_0;
7217 qman_base = mmNIC3_QM1_BASE;
7218 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1");
7219 break;
7220 case GAUDI_EVENT_NIC4_QM0:
7221 qid_base = GAUDI_QUEUE_ID_NIC_8_0;
7222 qman_base = mmNIC4_QM0_BASE;
7223 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0");
7224 break;
7225 case GAUDI_EVENT_NIC4_QM1:
7226 qid_base = GAUDI_QUEUE_ID_NIC_9_0;
7227 qman_base = mmNIC4_QM1_BASE;
7228 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1");
7229 break;
7230 default:
7231 return;
7232 }
7233
7234 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask);
7235 }
7236
gaudi_print_irq_info(struct hl_device * hdev,u16 event_type,bool check_razwi,u64 * event_mask)7237 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
7238 bool check_razwi, u64 *event_mask)
7239 {
7240 bool is_read = false, is_write = false;
7241 u16 engine_id[2], num_of_razwi_eng = 0;
7242 char desc[64] = "";
7243 u64 razwi_addr = 0;
7244 u8 razwi_flags = 0;
7245
7246 /*
7247 * Init engine id by default as not valid and only if razwi initiated from engine with
7248 * engine id it will get valid value.
7249 */
7250 engine_id[0] = HL_RAZWI_NA_ENG_ID;
7251 engine_id[1] = HL_RAZWI_NA_ENG_ID;
7252
7253 gaudi_get_event_desc(event_type, desc, sizeof(desc));
7254 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7255 event_type, desc);
7256
7257 if (check_razwi) {
7258 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
7259 &is_write);
7260 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
7261
7262 if (is_read)
7263 razwi_flags |= HL_RAZWI_READ;
7264 if (is_write)
7265 razwi_flags |= HL_RAZWI_WRITE;
7266
7267 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
7268 if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
7269 num_of_razwi_eng = 2;
7270 else
7271 num_of_razwi_eng = 1;
7272 }
7273
7274 if (razwi_flags)
7275 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng,
7276 razwi_flags, event_mask);
7277 }
7278 }
7279
gaudi_print_out_of_sync_info(struct hl_device * hdev,struct cpucp_pkt_sync_err * sync_err)7280 static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
7281 struct cpucp_pkt_sync_err *sync_err)
7282 {
7283 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
7284
7285 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
7286 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
7287 }
7288
gaudi_print_fw_alive_info(struct hl_device * hdev,struct hl_eq_fw_alive * fw_alive)7289 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
7290 struct hl_eq_fw_alive *fw_alive)
7291 {
7292 dev_err(hdev->dev,
7293 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
7294 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
7295 le32_to_cpu(fw_alive->process_id),
7296 le32_to_cpu(fw_alive->thread_id),
7297 le64_to_cpu(fw_alive->uptime_seconds));
7298 }
7299
gaudi_print_nic_axi_irq_info(struct hl_device * hdev,u16 event_type,void * data)7300 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
7301 void *data)
7302 {
7303 char desc[64] = "", *type;
7304 struct eq_nic_sei_event *eq_nic_sei = data;
7305 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0;
7306
7307 switch (eq_nic_sei->axi_error_cause) {
7308 case RXB:
7309 type = "RXB";
7310 break;
7311 case RXE:
7312 type = "RXE";
7313 break;
7314 case TXS:
7315 type = "TXS";
7316 break;
7317 case TXE:
7318 type = "TXE";
7319 break;
7320 case QPC_RESP:
7321 type = "QPC_RESP";
7322 break;
7323 case NON_AXI_ERR:
7324 type = "NON_AXI_ERR";
7325 break;
7326 case TMR:
7327 type = "TMR";
7328 break;
7329 default:
7330 dev_err(hdev->dev, "unknown NIC AXI cause %d\n",
7331 eq_nic_sei->axi_error_cause);
7332 type = "N/A";
7333 break;
7334 }
7335
7336 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type,
7337 eq_nic_sei->id);
7338 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
7339 event_type, desc);
7340 }
7341
gaudi_compute_reset_late_init(struct hl_device * hdev)7342 static int gaudi_compute_reset_late_init(struct hl_device *hdev)
7343 {
7344 /* GAUDI doesn't support any reset except hard-reset */
7345 return -EPERM;
7346 }
7347
gaudi_hbm_read_interrupts(struct hl_device * hdev,int device,struct hl_eq_hbm_ecc_data * hbm_ecc_data)7348 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
7349 struct hl_eq_hbm_ecc_data *hbm_ecc_data)
7350 {
7351 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
7352 int rc = 0;
7353
7354 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
7355 CPU_BOOT_DEV_STS0_HBM_ECC_EN) {
7356 if (!hbm_ecc_data) {
7357 dev_err(hdev->dev, "No FW ECC data");
7358 return 0;
7359 }
7360
7361 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK,
7362 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7363 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK,
7364 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7365 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK,
7366 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7367 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK,
7368 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7369 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK,
7370 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7371 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK,
7372 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7373 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK,
7374 le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
7375
7376 dev_err(hdev->dev,
7377 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7378 device, ch, wr_par, rd_par, ca_par, serr, derr);
7379 dev_err(hdev->dev,
7380 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
7381 device, ch, hbm_ecc_data->first_addr, type,
7382 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
7383 hbm_ecc_data->dec_cnt);
7384 return 0;
7385 }
7386
7387 if (hdev->asic_prop.fw_security_enabled) {
7388 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
7389 return 0;
7390 }
7391
7392 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
7393 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
7394 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
7395 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7396 if (val) {
7397 rc = -EIO;
7398 dev_err(hdev->dev,
7399 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7400 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
7401 (val >> 2) & 0x1, (val >> 3) & 0x1,
7402 (val >> 4) & 0x1);
7403
7404 val2 = RREG32(base + ch * 0x1000 + 0x060);
7405 dev_err(hdev->dev,
7406 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7407 device, ch * 2,
7408 RREG32(base + ch * 0x1000 + 0x064),
7409 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7410 (val2 & 0xFF0000) >> 16,
7411 (val2 & 0xFF000000) >> 24);
7412 }
7413
7414 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
7415 val = (val & 0xFF) | ((val >> 8) & 0xFF);
7416 if (val) {
7417 rc = -EIO;
7418 dev_err(hdev->dev,
7419 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
7420 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
7421 (val >> 2) & 0x1, (val >> 3) & 0x1,
7422 (val >> 4) & 0x1);
7423
7424 val2 = RREG32(base + ch * 0x1000 + 0x070);
7425 dev_err(hdev->dev,
7426 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n",
7427 device, ch * 2 + 1,
7428 RREG32(base + ch * 0x1000 + 0x074),
7429 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
7430 (val2 & 0xFF0000) >> 16,
7431 (val2 & 0xFF000000) >> 24);
7432 }
7433
7434 /* Clear interrupts */
7435 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
7436 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
7437 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
7438 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
7439 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
7440 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
7441 }
7442
7443 val = RREG32(base + 0x8F30);
7444 val2 = RREG32(base + 0x8F34);
7445 if (val | val2) {
7446 rc = -EIO;
7447 dev_err(hdev->dev,
7448 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
7449 device, val, val2);
7450 }
7451 val = RREG32(base + 0x8F40);
7452 val2 = RREG32(base + 0x8F44);
7453 if (val | val2) {
7454 rc = -EIO;
7455 dev_err(hdev->dev,
7456 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
7457 device, val, val2);
7458 }
7459
7460 return rc;
7461 }
7462
gaudi_hbm_event_to_dev(u16 hbm_event_type)7463 static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
7464 {
7465 switch (hbm_event_type) {
7466 case GAUDI_EVENT_HBM0_SPI_0:
7467 case GAUDI_EVENT_HBM0_SPI_1:
7468 return 0;
7469 case GAUDI_EVENT_HBM1_SPI_0:
7470 case GAUDI_EVENT_HBM1_SPI_1:
7471 return 1;
7472 case GAUDI_EVENT_HBM2_SPI_0:
7473 case GAUDI_EVENT_HBM2_SPI_1:
7474 return 2;
7475 case GAUDI_EVENT_HBM3_SPI_0:
7476 case GAUDI_EVENT_HBM3_SPI_1:
7477 return 3;
7478 default:
7479 break;
7480 }
7481
7482 /* Should never happen */
7483 return 0;
7484 }
7485
gaudi_tpc_read_interrupts(struct hl_device * hdev,u8 tpc_id,char * interrupt_name)7486 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
7487 char *interrupt_name)
7488 {
7489 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
7490 bool soft_reset_required = false;
7491
7492 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
7493 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
7494
7495 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
7496 if (tpc_interrupts_cause & BIT(i)) {
7497 dev_err_ratelimited(hdev->dev,
7498 "TPC%d_%s interrupt cause: %s\n",
7499 tpc_id, interrupt_name,
7500 gaudi_tpc_interrupts_cause[i]);
7501 /* If this is QM error, we need to soft-reset */
7502 if (i == 15)
7503 soft_reset_required = true;
7504 }
7505
7506 /* Clear interrupts */
7507 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
7508
7509 return soft_reset_required;
7510 }
7511
tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)7512 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
7513 {
7514 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
7515 }
7516
tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)7517 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
7518 {
7519 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
7520 }
7521
gaudi_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)7522 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
7523 {
7524 ktime_t zero_time = ktime_set(0, 0);
7525
7526 mutex_lock(&hdev->clk_throttling.lock);
7527
7528 switch (event_type) {
7529 case GAUDI_EVENT_FIX_POWER_ENV_S:
7530 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
7531 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
7532 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
7533 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
7534 dev_info_ratelimited(hdev->dev,
7535 "Clock throttling due to power consumption\n");
7536 break;
7537
7538 case GAUDI_EVENT_FIX_POWER_ENV_E:
7539 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
7540 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
7541 dev_info_ratelimited(hdev->dev,
7542 "Power envelop is safe, back to optimal clock\n");
7543 break;
7544
7545 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
7546 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
7547 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
7548 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
7549 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
7550 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7551 dev_info_ratelimited(hdev->dev,
7552 "Clock throttling due to overheating\n");
7553 break;
7554
7555 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
7556 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
7557 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
7558 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7559 dev_info_ratelimited(hdev->dev,
7560 "Thermal envelop is safe, back to optimal clock\n");
7561 break;
7562
7563 default:
7564 dev_err(hdev->dev, "Received invalid clock change event %d\n",
7565 event_type);
7566 break;
7567 }
7568
7569 mutex_unlock(&hdev->clk_throttling.lock);
7570 }
7571
gaudi_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)7572 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
7573 {
7574 struct gaudi_device *gaudi = hdev->asic_specific;
7575 struct hl_info_fw_err_info fw_err_info;
7576 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0;
7577 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
7578 u32 fw_fatal_err_flag = 0, flags = 0;
7579 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
7580 >> EQ_CTL_EVENT_TYPE_SHIFT);
7581 bool reset_required, reset_direct = false;
7582 u8 cause;
7583 int rc;
7584
7585 if (event_type >= GAUDI_EVENT_SIZE) {
7586 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
7587 event_type, GAUDI_EVENT_SIZE - 1);
7588 return;
7589 }
7590
7591 gaudi->events_stat[event_type]++;
7592 gaudi->events_stat_aggregate[event_type]++;
7593
7594 switch (event_type) {
7595 case GAUDI_EVENT_PCIE_CORE_DERR:
7596 case GAUDI_EVENT_PCIE_IF_DERR:
7597 case GAUDI_EVENT_PCIE_PHY_DERR:
7598 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
7599 case GAUDI_EVENT_MME0_ACC_DERR:
7600 case GAUDI_EVENT_MME0_SBAB_DERR:
7601 case GAUDI_EVENT_MME1_ACC_DERR:
7602 case GAUDI_EVENT_MME1_SBAB_DERR:
7603 case GAUDI_EVENT_MME2_ACC_DERR:
7604 case GAUDI_EVENT_MME2_SBAB_DERR:
7605 case GAUDI_EVENT_MME3_ACC_DERR:
7606 case GAUDI_EVENT_MME3_SBAB_DERR:
7607 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
7608 fallthrough;
7609 case GAUDI_EVENT_CPU_IF_ECC_DERR:
7610 case GAUDI_EVENT_PSOC_MEM_DERR:
7611 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
7612 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
7613 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR:
7614 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
7615 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
7616 case GAUDI_EVENT_MMU_DERR:
7617 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
7618 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7619 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7620 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7621 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7622 goto reset_device;
7623
7624 case GAUDI_EVENT_GIC500:
7625 case GAUDI_EVENT_AXI_ECC:
7626 case GAUDI_EVENT_L2_RAM_ECC:
7627 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
7628 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7629 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7630 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7631 goto reset_device;
7632
7633 case GAUDI_EVENT_HBM0_SPI_0:
7634 case GAUDI_EVENT_HBM1_SPI_0:
7635 case GAUDI_EVENT_HBM2_SPI_0:
7636 case GAUDI_EVENT_HBM3_SPI_0:
7637 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7638 gaudi_hbm_read_interrupts(hdev,
7639 gaudi_hbm_event_to_dev(event_type),
7640 &eq_entry->hbm_ecc_data);
7641 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
7642 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7643 goto reset_device;
7644
7645 case GAUDI_EVENT_HBM0_SPI_1:
7646 case GAUDI_EVENT_HBM1_SPI_1:
7647 case GAUDI_EVENT_HBM2_SPI_1:
7648 case GAUDI_EVENT_HBM3_SPI_1:
7649 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7650 gaudi_hbm_read_interrupts(hdev,
7651 gaudi_hbm_event_to_dev(event_type),
7652 &eq_entry->hbm_ecc_data);
7653 hl_fw_unmask_irq(hdev, event_type);
7654 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7655 break;
7656
7657 case GAUDI_EVENT_TPC0_DEC:
7658 case GAUDI_EVENT_TPC1_DEC:
7659 case GAUDI_EVENT_TPC2_DEC:
7660 case GAUDI_EVENT_TPC3_DEC:
7661 case GAUDI_EVENT_TPC4_DEC:
7662 case GAUDI_EVENT_TPC5_DEC:
7663 case GAUDI_EVENT_TPC6_DEC:
7664 case GAUDI_EVENT_TPC7_DEC:
7665 /* In TPC DEC event, notify on TPC assertion. While there isn't
7666 * a specific event for assertion yet, the FW generates TPC DEC event.
7667 * The SW upper layer will inspect an internal mapped area to indicate
7668 * if the event is a TPC Assertion or a "real" TPC DEC.
7669 */
7670 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
7671 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7672 reset_required = gaudi_tpc_read_interrupts(hdev,
7673 tpc_dec_event_to_tpc_id(event_type),
7674 "AXI_SLV_DEC_Error");
7675 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7676 if (reset_required) {
7677 dev_err(hdev->dev, "reset required due to %s\n",
7678 gaudi_irq_map_table[event_type].name);
7679
7680 reset_direct = true;
7681 goto reset_device;
7682 } else {
7683 hl_fw_unmask_irq(hdev, event_type);
7684 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7685 }
7686 break;
7687
7688 case GAUDI_EVENT_TPC0_KRN_ERR:
7689 case GAUDI_EVENT_TPC1_KRN_ERR:
7690 case GAUDI_EVENT_TPC2_KRN_ERR:
7691 case GAUDI_EVENT_TPC3_KRN_ERR:
7692 case GAUDI_EVENT_TPC4_KRN_ERR:
7693 case GAUDI_EVENT_TPC5_KRN_ERR:
7694 case GAUDI_EVENT_TPC6_KRN_ERR:
7695 case GAUDI_EVENT_TPC7_KRN_ERR:
7696 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7697 reset_required = gaudi_tpc_read_interrupts(hdev,
7698 tpc_krn_event_to_tpc_id(event_type),
7699 "KRN_ERR");
7700 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7701 if (reset_required) {
7702 dev_err(hdev->dev, "reset required due to %s\n",
7703 gaudi_irq_map_table[event_type].name);
7704
7705 reset_direct = true;
7706 goto reset_device;
7707 } else {
7708 hl_fw_unmask_irq(hdev, event_type);
7709 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7710 }
7711 break;
7712
7713 case GAUDI_EVENT_PCIE_CORE_SERR:
7714 case GAUDI_EVENT_PCIE_IF_SERR:
7715 case GAUDI_EVENT_PCIE_PHY_SERR:
7716 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
7717 case GAUDI_EVENT_MME0_ACC_SERR:
7718 case GAUDI_EVENT_MME0_SBAB_SERR:
7719 case GAUDI_EVENT_MME1_ACC_SERR:
7720 case GAUDI_EVENT_MME1_SBAB_SERR:
7721 case GAUDI_EVENT_MME2_ACC_SERR:
7722 case GAUDI_EVENT_MME2_SBAB_SERR:
7723 case GAUDI_EVENT_MME3_ACC_SERR:
7724 case GAUDI_EVENT_MME3_SBAB_SERR:
7725 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
7726 case GAUDI_EVENT_CPU_IF_ECC_SERR:
7727 case GAUDI_EVENT_PSOC_MEM_SERR:
7728 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
7729 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
7730 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR:
7731 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
7732 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
7733 fallthrough;
7734 case GAUDI_EVENT_MMU_SERR:
7735 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7736 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
7737 hl_fw_unmask_irq(hdev, event_type);
7738 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7739 break;
7740
7741 case GAUDI_EVENT_PCIE_DEC:
7742 case GAUDI_EVENT_CPU_AXI_SPLITTER:
7743 case GAUDI_EVENT_PSOC_AXI_DEC:
7744 case GAUDI_EVENT_PSOC_PRSTN_FALL:
7745 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7746 hl_fw_unmask_irq(hdev, event_type);
7747 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7748 break;
7749
7750 case GAUDI_EVENT_MMU_PAGE_FAULT:
7751 case GAUDI_EVENT_MMU_WR_PERM:
7752 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7753 hl_fw_unmask_irq(hdev, event_type);
7754 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7755 break;
7756
7757 case GAUDI_EVENT_MME0_WBC_RSP:
7758 case GAUDI_EVENT_MME0_SBAB0_RSP:
7759 case GAUDI_EVENT_MME1_WBC_RSP:
7760 case GAUDI_EVENT_MME1_SBAB0_RSP:
7761 case GAUDI_EVENT_MME2_WBC_RSP:
7762 case GAUDI_EVENT_MME2_SBAB0_RSP:
7763 case GAUDI_EVENT_MME3_WBC_RSP:
7764 case GAUDI_EVENT_MME3_SBAB0_RSP:
7765 case GAUDI_EVENT_RAZWI_OR_ADC:
7766 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
7767 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
7768 fallthrough;
7769 case GAUDI_EVENT_NIC0_QM0:
7770 case GAUDI_EVENT_NIC0_QM1:
7771 case GAUDI_EVENT_NIC1_QM0:
7772 case GAUDI_EVENT_NIC1_QM1:
7773 case GAUDI_EVENT_NIC2_QM0:
7774 case GAUDI_EVENT_NIC2_QM1:
7775 case GAUDI_EVENT_NIC3_QM0:
7776 case GAUDI_EVENT_NIC3_QM1:
7777 case GAUDI_EVENT_NIC4_QM0:
7778 case GAUDI_EVENT_NIC4_QM1:
7779 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
7780 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
7781 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7782 gaudi_handle_qman_err(hdev, event_type, &event_mask);
7783 hl_fw_unmask_irq(hdev, event_type);
7784 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
7785 break;
7786
7787 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
7788 gaudi_print_irq_info(hdev, event_type, true, &event_mask);
7789 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7790 goto reset_device;
7791
7792 case GAUDI_EVENT_TPC0_BMON_SPMU:
7793 case GAUDI_EVENT_TPC1_BMON_SPMU:
7794 case GAUDI_EVENT_TPC2_BMON_SPMU:
7795 case GAUDI_EVENT_TPC3_BMON_SPMU:
7796 case GAUDI_EVENT_TPC4_BMON_SPMU:
7797 case GAUDI_EVENT_TPC5_BMON_SPMU:
7798 case GAUDI_EVENT_TPC6_BMON_SPMU:
7799 case GAUDI_EVENT_TPC7_BMON_SPMU:
7800 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
7801 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7802 hl_fw_unmask_irq(hdev, event_type);
7803 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7804 break;
7805
7806 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4:
7807 gaudi_print_nic_axi_irq_info(hdev, event_type, &data);
7808 hl_fw_unmask_irq(hdev, event_type);
7809 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7810 break;
7811
7812 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
7813 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7814 gaudi_print_sm_sei_info(hdev, event_type,
7815 &eq_entry->sm_sei_data);
7816 rc = hl_state_dump(hdev);
7817 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7818 if (rc)
7819 dev_err(hdev->dev,
7820 "Error during system state dump %d\n", rc);
7821 hl_fw_unmask_irq(hdev, event_type);
7822 break;
7823
7824 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1:
7825 break;
7826
7827 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
7828 gaudi_print_clk_change_info(hdev, event_type, &event_mask);
7829 hl_fw_unmask_irq(hdev, event_type);
7830 break;
7831
7832 case GAUDI_EVENT_PSOC_GPIO_U16_0:
7833 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
7834 dev_err(hdev->dev,
7835 "Received high temp H/W interrupt %d (cause %d)\n",
7836 event_type, cause);
7837 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
7838 break;
7839
7840 case GAUDI_EVENT_DEV_RESET_REQ:
7841 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7842 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7843 goto reset_device;
7844
7845 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
7846 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7847 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
7848 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
7849 goto reset_device;
7850
7851 case GAUDI_EVENT_FW_ALIVE_S:
7852 gaudi_print_irq_info(hdev, event_type, false, &event_mask);
7853 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
7854 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR;
7855 fw_err_info.event_id = event_type;
7856 fw_err_info.event_mask = &event_mask;
7857 hl_handle_fw_err(hdev, &fw_err_info);
7858 goto reset_device;
7859
7860 default:
7861 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
7862 event_type);
7863 break;
7864 }
7865
7866 if (event_mask)
7867 hl_notifier_event_send_all(hdev, event_mask);
7868
7869 return;
7870
7871 reset_device:
7872 reset_required = true;
7873
7874 if (hdev->asic_prop.fw_security_enabled && !reset_direct) {
7875 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag;
7876
7877 /* notify on device unavailable while the reset triggered by fw */
7878 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
7879 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
7880 } else if (hdev->hard_reset_on_fw_events) {
7881 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag;
7882 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
7883 } else {
7884 reset_required = false;
7885 }
7886
7887 if (reset_required) {
7888 /* escalate general hw errors to critical/fatal error */
7889 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
7890 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
7891
7892 hl_device_cond_reset(hdev, flags, event_mask);
7893 } else {
7894 hl_fw_unmask_irq(hdev, event_type);
7895 /* Notification on occurred event needs to be sent although reset is not executed */
7896 if (event_mask)
7897 hl_notifier_event_send_all(hdev, event_mask);
7898 }
7899 }
7900
gaudi_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7901 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7902 {
7903 struct gaudi_device *gaudi = hdev->asic_specific;
7904
7905 if (aggregate) {
7906 *size = (u32) sizeof(gaudi->events_stat_aggregate);
7907 return gaudi->events_stat_aggregate;
7908 }
7909
7910 *size = (u32) sizeof(gaudi->events_stat);
7911 return gaudi->events_stat;
7912 }
7913
gaudi_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)7914 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
7915 {
7916 struct gaudi_device *gaudi = hdev->asic_specific;
7917 u32 status, timeout_usec;
7918 int rc;
7919
7920 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
7921 hdev->reset_info.hard_reset_pending)
7922 return 0;
7923
7924 if (hdev->pldm)
7925 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7926 else
7927 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7928
7929 /* L0 & L1 invalidation */
7930 WREG32(mmSTLB_INV_PS, 3);
7931 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
7932 WREG32(mmSTLB_INV_PS, 2);
7933
7934 rc = hl_poll_timeout(
7935 hdev,
7936 mmSTLB_INV_PS,
7937 status,
7938 !status,
7939 1000,
7940 timeout_usec);
7941
7942 WREG32(mmSTLB_INV_SET, 0);
7943
7944 return rc;
7945 }
7946
gaudi_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)7947 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
7948 bool is_hard, u32 flags,
7949 u32 asid, u64 va, u64 size)
7950 {
7951 /* Treat as invalidate all because there is no range invalidation
7952 * in Gaudi
7953 */
7954 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags);
7955 }
7956
gaudi_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 asid,u64 phys_addr)7957 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr)
7958 {
7959 u32 status, timeout_usec;
7960 int rc;
7961
7962 if (hdev->pldm)
7963 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
7964 else
7965 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
7966
7967 WREG32(MMU_ASID, asid);
7968 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
7969 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
7970 WREG32(MMU_BUSY, 0x80000000);
7971
7972 rc = hl_poll_timeout(
7973 hdev,
7974 MMU_BUSY,
7975 status,
7976 !(status & 0x80000000),
7977 1000,
7978 timeout_usec);
7979
7980 if (rc) {
7981 dev_err(hdev->dev,
7982 "Timeout during MMU hop0 config of asid %d\n", asid);
7983 return rc;
7984 }
7985
7986 return 0;
7987 }
7988
gaudi_send_heartbeat(struct hl_device * hdev)7989 static int gaudi_send_heartbeat(struct hl_device *hdev)
7990 {
7991 struct gaudi_device *gaudi = hdev->asic_specific;
7992
7993 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
7994 return 0;
7995
7996 return hl_fw_send_heartbeat(hdev);
7997 }
7998
gaudi_cpucp_info_get(struct hl_device * hdev)7999 static int gaudi_cpucp_info_get(struct hl_device *hdev)
8000 {
8001 struct gaudi_device *gaudi = hdev->asic_specific;
8002 struct asic_fixed_properties *prop = &hdev->asic_prop;
8003 int rc;
8004
8005 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8006 return 0;
8007
8008 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0,
8009 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
8010 mmCPU_BOOT_ERR1);
8011 if (rc)
8012 return rc;
8013
8014 if (!strlen(prop->cpucp_info.card_name))
8015 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
8016 CARD_NAME_MAX_LEN);
8017
8018 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
8019
8020 set_default_power_values(hdev);
8021
8022 return 0;
8023 }
8024
gaudi_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)8025 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
8026 struct engines_data *e)
8027 {
8028 struct gaudi_device *gaudi = hdev->asic_specific;
8029 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
8030 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
8031 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
8032 unsigned long *mask = (unsigned long *)mask_arr;
8033 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
8034 bool is_idle = true, is_eng_idle, is_slave;
8035 u64 offset;
8036 int i, dma_id, port;
8037
8038 if (e)
8039 hl_engine_data_sprintf(e,
8040 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
8041 "--- ------- ------------ ---------- -------------\n");
8042
8043 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
8044 dma_id = gaudi_dma_assignment[i];
8045 offset = dma_id * DMA_QMAN_OFFSET;
8046
8047 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
8048 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
8049 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
8050 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8051 IS_DMA_IDLE(dma_core_sts0);
8052 is_idle &= is_eng_idle;
8053
8054 if (mask && !is_eng_idle)
8055 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
8056 if (e)
8057 hl_engine_data_sprintf(e, fmt, dma_id,
8058 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
8059 qm_cgm_sts, dma_core_sts0);
8060 }
8061
8062 if (e)
8063 hl_engine_data_sprintf(e,
8064 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
8065 "--- ------- ------------ ---------- ----------\n");
8066
8067 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
8068 offset = i * TPC_QMAN_OFFSET;
8069 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
8070 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
8071 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
8072 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
8073 IS_TPC_IDLE(tpc_cfg_sts);
8074 is_idle &= is_eng_idle;
8075
8076 if (mask && !is_eng_idle)
8077 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
8078 if (e)
8079 hl_engine_data_sprintf(e, fmt, i,
8080 is_eng_idle ? "Y" : "N",
8081 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
8082 }
8083
8084 if (e)
8085 hl_engine_data_sprintf(e,
8086 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
8087 "--- ------- ------------ ---------- -----------\n");
8088
8089 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
8090 offset = i * MME_QMAN_OFFSET;
8091 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
8092 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
8093
8094 /* MME 1 & 3 are slaves, no need to check their QMANs */
8095 is_slave = i % 2;
8096 if (!is_slave) {
8097 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
8098 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
8099 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8100 }
8101
8102 is_idle &= is_eng_idle;
8103
8104 if (mask && !is_eng_idle)
8105 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
8106 if (e) {
8107 if (!is_slave)
8108 hl_engine_data_sprintf(e, fmt, i,
8109 is_eng_idle ? "Y" : "N",
8110 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
8111 else
8112 hl_engine_data_sprintf(e, mme_slave_fmt, i,
8113 is_eng_idle ? "Y" : "N", "-",
8114 "-", mme_arch_sts);
8115 }
8116 }
8117
8118 if (e)
8119 hl_engine_data_sprintf(e,
8120 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
8121 "--- ------- ------------ ----------\n");
8122
8123 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) {
8124 offset = i * NIC_MACRO_QMAN_OFFSET;
8125 port = 2 * i;
8126 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8127 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
8128 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
8129 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8130 is_idle &= is_eng_idle;
8131
8132 if (mask && !is_eng_idle)
8133 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8134 if (e)
8135 hl_engine_data_sprintf(e, nic_fmt, port,
8136 is_eng_idle ? "Y" : "N",
8137 qm_glbl_sts0, qm_cgm_sts);
8138 }
8139
8140 port = 2 * i + 1;
8141 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) {
8142 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset);
8143 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset);
8144 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
8145 is_idle &= is_eng_idle;
8146
8147 if (mask && !is_eng_idle)
8148 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
8149 if (e)
8150 hl_engine_data_sprintf(e, nic_fmt, port,
8151 is_eng_idle ? "Y" : "N",
8152 qm_glbl_sts0, qm_cgm_sts);
8153 }
8154 }
8155
8156 if (e)
8157 hl_engine_data_sprintf(e, "\n");
8158
8159 return is_idle;
8160 }
8161
gaudi_hw_queues_lock(struct hl_device * hdev)8162 static void gaudi_hw_queues_lock(struct hl_device *hdev)
8163 __acquires(&gaudi->hw_queues_lock)
8164 {
8165 struct gaudi_device *gaudi = hdev->asic_specific;
8166
8167 spin_lock(&gaudi->hw_queues_lock);
8168 }
8169
gaudi_hw_queues_unlock(struct hl_device * hdev)8170 static void gaudi_hw_queues_unlock(struct hl_device *hdev)
8171 __releases(&gaudi->hw_queues_lock)
8172 {
8173 struct gaudi_device *gaudi = hdev->asic_specific;
8174
8175 spin_unlock(&gaudi->hw_queues_lock);
8176 }
8177
gaudi_get_pci_id(struct hl_device * hdev)8178 static u32 gaudi_get_pci_id(struct hl_device *hdev)
8179 {
8180 return hdev->pdev->device;
8181 }
8182
gaudi_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)8183 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
8184 size_t max_size)
8185 {
8186 struct gaudi_device *gaudi = hdev->asic_specific;
8187
8188 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8189 return 0;
8190
8191 return hl_fw_get_eeprom_data(hdev, data, max_size);
8192 }
8193
gaudi_get_monitor_dump(struct hl_device * hdev,void * data)8194 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data)
8195 {
8196 struct gaudi_device *gaudi = hdev->asic_specific;
8197
8198 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
8199 return 0;
8200
8201 return hl_fw_get_monitor_dump(hdev, data);
8202 }
8203
8204 /*
8205 * this function should be used only during initialization and/or after reset,
8206 * when there are no active users.
8207 */
gaudi_run_tpc_kernel(struct hl_device * hdev,u64 tpc_kernel,u32 tpc_id)8208 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id)
8209 {
8210 u64 kernel_timeout;
8211 u32 status, offset;
8212 int rc;
8213
8214 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
8215
8216 if (hdev->pldm)
8217 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
8218 else
8219 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
8220
8221 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
8222 lower_32_bits(tpc_kernel));
8223 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
8224 upper_32_bits(tpc_kernel));
8225
8226 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
8227 lower_32_bits(tpc_kernel));
8228 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
8229 upper_32_bits(tpc_kernel));
8230 /* set a valid LUT pointer, content is of no significance */
8231 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
8232 lower_32_bits(tpc_kernel));
8233 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
8234 upper_32_bits(tpc_kernel));
8235
8236 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
8237 lower_32_bits(CFG_BASE +
8238 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
8239
8240 WREG32(mmTPC0_CFG_TPC_CMD + offset,
8241 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
8242 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
8243 /* wait a bit for the engine to start executing */
8244 usleep_range(1000, 1500);
8245
8246 /* wait until engine has finished executing */
8247 rc = hl_poll_timeout(
8248 hdev,
8249 mmTPC0_CFG_STATUS + offset,
8250 status,
8251 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8252 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8253 1000,
8254 kernel_timeout);
8255
8256 if (rc) {
8257 dev_err(hdev->dev,
8258 "Timeout while waiting for TPC%d icache prefetch\n",
8259 tpc_id);
8260 return -EIO;
8261 }
8262
8263 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
8264 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
8265
8266 /* wait a bit for the engine to start executing */
8267 usleep_range(1000, 1500);
8268
8269 /* wait until engine has finished executing */
8270 rc = hl_poll_timeout(
8271 hdev,
8272 mmTPC0_CFG_STATUS + offset,
8273 status,
8274 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
8275 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
8276 1000,
8277 kernel_timeout);
8278
8279 if (rc) {
8280 dev_err(hdev->dev,
8281 "Timeout while waiting for TPC%d vector pipe\n",
8282 tpc_id);
8283 return -EIO;
8284 }
8285
8286 rc = hl_poll_timeout(
8287 hdev,
8288 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
8289 status,
8290 (status == 0),
8291 1000,
8292 kernel_timeout);
8293
8294 if (rc) {
8295 dev_err(hdev->dev,
8296 "Timeout while waiting for TPC%d kernel to execute\n",
8297 tpc_id);
8298 return -EIO;
8299 }
8300
8301 return 0;
8302 }
8303
gaudi_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)8304 static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
8305 struct hl_ctx *ctx)
8306 {
8307 struct gaudi_device *gaudi = hdev->asic_specific;
8308 int min_alloc_order, rc, collective_cb_size;
8309
8310 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8311 return 0;
8312
8313 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
8314 HOST_SPACE_INTERNAL_CB_SZ,
8315 &hdev->internal_cb_pool_dma_addr,
8316 GFP_KERNEL | __GFP_ZERO);
8317
8318 if (!hdev->internal_cb_pool_virt_addr)
8319 return -ENOMEM;
8320
8321 collective_cb_size = sizeof(struct packet_msg_short) * 5 +
8322 sizeof(struct packet_fence);
8323 min_alloc_order = ilog2(collective_cb_size);
8324
8325 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
8326 if (!hdev->internal_cb_pool) {
8327 dev_err(hdev->dev,
8328 "Failed to create internal CB pool\n");
8329 rc = -ENOMEM;
8330 goto free_internal_cb_pool;
8331 }
8332
8333 rc = gen_pool_add(hdev->internal_cb_pool,
8334 (uintptr_t) hdev->internal_cb_pool_virt_addr,
8335 HOST_SPACE_INTERNAL_CB_SZ, -1);
8336 if (rc) {
8337 dev_err(hdev->dev,
8338 "Failed to add memory to internal CB pool\n");
8339 rc = -EFAULT;
8340 goto destroy_internal_cb_pool;
8341 }
8342
8343 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx,
8344 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ,
8345 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
8346
8347 if (!hdev->internal_cb_va_base) {
8348 rc = -ENOMEM;
8349 goto destroy_internal_cb_pool;
8350 }
8351
8352 mutex_lock(&hdev->mmu_lock);
8353
8354 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
8355 hdev->internal_cb_pool_dma_addr,
8356 HOST_SPACE_INTERNAL_CB_SZ);
8357 if (rc)
8358 goto unreserve_internal_cb_pool;
8359
8360 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
8361 if (rc)
8362 goto unmap_internal_cb_pool;
8363
8364 mutex_unlock(&hdev->mmu_lock);
8365
8366 return 0;
8367
8368 unmap_internal_cb_pool:
8369 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8370 HOST_SPACE_INTERNAL_CB_SZ);
8371 unreserve_internal_cb_pool:
8372 mutex_unlock(&hdev->mmu_lock);
8373 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8374 HOST_SPACE_INTERNAL_CB_SZ);
8375 destroy_internal_cb_pool:
8376 gen_pool_destroy(hdev->internal_cb_pool);
8377 free_internal_cb_pool:
8378 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8379 hdev->internal_cb_pool_dma_addr);
8380
8381 return rc;
8382 }
8383
gaudi_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)8384 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
8385 struct hl_ctx *ctx)
8386 {
8387 struct gaudi_device *gaudi = hdev->asic_specific;
8388
8389 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
8390 return;
8391
8392 mutex_lock(&hdev->mmu_lock);
8393 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
8394 HOST_SPACE_INTERNAL_CB_SZ);
8395 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
8396 HOST_SPACE_INTERNAL_CB_SZ);
8397 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
8398 mutex_unlock(&hdev->mmu_lock);
8399
8400 gen_pool_destroy(hdev->internal_cb_pool);
8401
8402 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
8403 hdev->internal_cb_pool_dma_addr);
8404 }
8405
gaudi_ctx_init(struct hl_ctx * ctx)8406 static int gaudi_ctx_init(struct hl_ctx *ctx)
8407 {
8408 int rc;
8409
8410 if (ctx->asid == HL_KERNEL_ASID_ID)
8411 return 0;
8412
8413 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
8414 if (rc)
8415 return rc;
8416
8417 rc = gaudi_restore_user_registers(ctx->hdev);
8418 if (rc)
8419 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8420
8421 return rc;
8422 }
8423
gaudi_ctx_fini(struct hl_ctx * ctx)8424 static void gaudi_ctx_fini(struct hl_ctx *ctx)
8425 {
8426 if (ctx->asid == HL_KERNEL_ASID_ID)
8427 return;
8428
8429 gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
8430 }
8431
gaudi_pre_schedule_cs(struct hl_cs * cs)8432 static int gaudi_pre_schedule_cs(struct hl_cs *cs)
8433 {
8434 return 0;
8435 }
8436
gaudi_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)8437 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
8438 {
8439 return gaudi_cq_assignment[cq_idx];
8440 }
8441
gaudi_get_signal_cb_size(struct hl_device * hdev)8442 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
8443 {
8444 return sizeof(struct packet_msg_short) +
8445 sizeof(struct packet_msg_prot) * 2;
8446 }
8447
gaudi_get_wait_cb_size(struct hl_device * hdev)8448 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
8449 {
8450 return sizeof(struct packet_msg_short) * 4 +
8451 sizeof(struct packet_fence) +
8452 sizeof(struct packet_msg_prot) * 2;
8453 }
8454
gaudi_get_sob_addr(struct hl_device * hdev,u32 sob_id)8455 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
8456 {
8457 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
8458 }
8459
gaudi_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)8460 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
8461 u32 size, bool eb)
8462 {
8463 struct hl_cb *cb = (struct hl_cb *) data;
8464 struct packet_msg_short *pkt;
8465 u32 value, ctl, pkt_size = sizeof(*pkt);
8466
8467 pkt = cb->kernel_address + size;
8468 memset(pkt, 0, pkt_size);
8469
8470 /* Inc by 1, Mode ADD */
8471 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
8472 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
8473
8474 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
8475 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8476 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
8477 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8478 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
8479 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8480 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8481
8482 pkt->value = cpu_to_le32(value);
8483 pkt->ctl = cpu_to_le32(ctl);
8484
8485 return size + pkt_size;
8486 }
8487
gaudi_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)8488 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
8489 u16 addr)
8490 {
8491 u32 ctl, pkt_size = sizeof(*pkt);
8492
8493 memset(pkt, 0, pkt_size);
8494
8495 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
8496 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8497 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8498 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8499 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8500 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
8501
8502 pkt->value = cpu_to_le32(value);
8503 pkt->ctl = cpu_to_le32(ctl);
8504
8505 return pkt_size;
8506 }
8507
gaudi_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 mon_id)8508 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
8509 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask,
8510 u16 sob_val, u16 mon_id)
8511 {
8512 u64 monitor_base;
8513 u32 ctl, value, pkt_size = sizeof(*pkt);
8514 u16 msg_addr_offset;
8515 u8 mask;
8516
8517 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
8518 dev_err(hdev->dev,
8519 "sob_base %u (mask %#x) is not valid\n",
8520 sob_base, sob_mask);
8521 return 0;
8522 }
8523
8524 /*
8525 * monitor_base should be the content of the base0 address registers,
8526 * so it will be added to the msg short offsets
8527 */
8528 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8529
8530 msg_addr_offset =
8531 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
8532 monitor_base;
8533
8534 memset(pkt, 0, pkt_size);
8535
8536 /* Monitor config packet: bind the monitor to a sync object */
8537 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
8538 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
8539 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK,
8540 0); /* GREATER OR EQUAL*/
8541 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask);
8542
8543 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
8544 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
8545 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
8546 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8550
8551 pkt->value = cpu_to_le32(value);
8552 pkt->ctl = cpu_to_le32(ctl);
8553
8554 return pkt_size;
8555 }
8556
gaudi_add_fence_pkt(struct packet_fence * pkt)8557 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
8558 {
8559 u32 ctl, cfg, pkt_size = sizeof(*pkt);
8560
8561 memset(pkt, 0, pkt_size);
8562
8563 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
8564 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
8565 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
8566
8567 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
8568 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
8569 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
8570 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
8571
8572 pkt->cfg = cpu_to_le32(cfg);
8573 pkt->ctl = cpu_to_le32(ctl);
8574
8575 return pkt_size;
8576 }
8577
gaudi_get_fence_addr(struct hl_device * hdev,u32 queue_id,u64 * addr)8578 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr)
8579 {
8580 u32 offset, nic_index;
8581
8582 switch (queue_id) {
8583 case GAUDI_QUEUE_ID_DMA_0_0:
8584 offset = mmDMA0_QM_CP_FENCE2_RDATA_0;
8585 break;
8586 case GAUDI_QUEUE_ID_DMA_0_1:
8587 offset = mmDMA0_QM_CP_FENCE2_RDATA_1;
8588 break;
8589 case GAUDI_QUEUE_ID_DMA_0_2:
8590 offset = mmDMA0_QM_CP_FENCE2_RDATA_2;
8591 break;
8592 case GAUDI_QUEUE_ID_DMA_0_3:
8593 offset = mmDMA0_QM_CP_FENCE2_RDATA_3;
8594 break;
8595 case GAUDI_QUEUE_ID_DMA_1_0:
8596 offset = mmDMA1_QM_CP_FENCE2_RDATA_0;
8597 break;
8598 case GAUDI_QUEUE_ID_DMA_1_1:
8599 offset = mmDMA1_QM_CP_FENCE2_RDATA_1;
8600 break;
8601 case GAUDI_QUEUE_ID_DMA_1_2:
8602 offset = mmDMA1_QM_CP_FENCE2_RDATA_2;
8603 break;
8604 case GAUDI_QUEUE_ID_DMA_1_3:
8605 offset = mmDMA1_QM_CP_FENCE2_RDATA_3;
8606 break;
8607 case GAUDI_QUEUE_ID_DMA_5_0:
8608 offset = mmDMA5_QM_CP_FENCE2_RDATA_0;
8609 break;
8610 case GAUDI_QUEUE_ID_DMA_5_1:
8611 offset = mmDMA5_QM_CP_FENCE2_RDATA_1;
8612 break;
8613 case GAUDI_QUEUE_ID_DMA_5_2:
8614 offset = mmDMA5_QM_CP_FENCE2_RDATA_2;
8615 break;
8616 case GAUDI_QUEUE_ID_DMA_5_3:
8617 offset = mmDMA5_QM_CP_FENCE2_RDATA_3;
8618 break;
8619 case GAUDI_QUEUE_ID_TPC_7_0:
8620 offset = mmTPC7_QM_CP_FENCE2_RDATA_0;
8621 break;
8622 case GAUDI_QUEUE_ID_TPC_7_1:
8623 offset = mmTPC7_QM_CP_FENCE2_RDATA_1;
8624 break;
8625 case GAUDI_QUEUE_ID_TPC_7_2:
8626 offset = mmTPC7_QM_CP_FENCE2_RDATA_2;
8627 break;
8628 case GAUDI_QUEUE_ID_TPC_7_3:
8629 offset = mmTPC7_QM_CP_FENCE2_RDATA_3;
8630 break;
8631 case GAUDI_QUEUE_ID_NIC_0_0:
8632 case GAUDI_QUEUE_ID_NIC_1_0:
8633 case GAUDI_QUEUE_ID_NIC_2_0:
8634 case GAUDI_QUEUE_ID_NIC_3_0:
8635 case GAUDI_QUEUE_ID_NIC_4_0:
8636 case GAUDI_QUEUE_ID_NIC_5_0:
8637 case GAUDI_QUEUE_ID_NIC_6_0:
8638 case GAUDI_QUEUE_ID_NIC_7_0:
8639 case GAUDI_QUEUE_ID_NIC_8_0:
8640 case GAUDI_QUEUE_ID_NIC_9_0:
8641 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2;
8642 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 +
8643 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8644 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8645 break;
8646 case GAUDI_QUEUE_ID_NIC_0_1:
8647 case GAUDI_QUEUE_ID_NIC_1_1:
8648 case GAUDI_QUEUE_ID_NIC_2_1:
8649 case GAUDI_QUEUE_ID_NIC_3_1:
8650 case GAUDI_QUEUE_ID_NIC_4_1:
8651 case GAUDI_QUEUE_ID_NIC_5_1:
8652 case GAUDI_QUEUE_ID_NIC_6_1:
8653 case GAUDI_QUEUE_ID_NIC_7_1:
8654 case GAUDI_QUEUE_ID_NIC_8_1:
8655 case GAUDI_QUEUE_ID_NIC_9_1:
8656 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2;
8657 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 +
8658 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8659 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8660 break;
8661 case GAUDI_QUEUE_ID_NIC_0_2:
8662 case GAUDI_QUEUE_ID_NIC_1_2:
8663 case GAUDI_QUEUE_ID_NIC_2_2:
8664 case GAUDI_QUEUE_ID_NIC_3_2:
8665 case GAUDI_QUEUE_ID_NIC_4_2:
8666 case GAUDI_QUEUE_ID_NIC_5_2:
8667 case GAUDI_QUEUE_ID_NIC_6_2:
8668 case GAUDI_QUEUE_ID_NIC_7_2:
8669 case GAUDI_QUEUE_ID_NIC_8_2:
8670 case GAUDI_QUEUE_ID_NIC_9_2:
8671 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2;
8672 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 +
8673 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8674 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8675 break;
8676 case GAUDI_QUEUE_ID_NIC_0_3:
8677 case GAUDI_QUEUE_ID_NIC_1_3:
8678 case GAUDI_QUEUE_ID_NIC_2_3:
8679 case GAUDI_QUEUE_ID_NIC_3_3:
8680 case GAUDI_QUEUE_ID_NIC_4_3:
8681 case GAUDI_QUEUE_ID_NIC_5_3:
8682 case GAUDI_QUEUE_ID_NIC_6_3:
8683 case GAUDI_QUEUE_ID_NIC_7_3:
8684 case GAUDI_QUEUE_ID_NIC_8_3:
8685 case GAUDI_QUEUE_ID_NIC_9_3:
8686 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2;
8687 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 +
8688 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET +
8689 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET;
8690 break;
8691 default:
8692 return -EINVAL;
8693 }
8694
8695 *addr = CFG_BASE + offset;
8696
8697 return 0;
8698 }
8699
gaudi_add_mon_pkts(void * buf,u16 mon_id,u64 fence_addr)8700 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr)
8701 {
8702 u64 monitor_base;
8703 u32 size = 0;
8704 u16 msg_addr_offset;
8705
8706 /*
8707 * monitor_base should be the content of the base0 address registers,
8708 * so it will be added to the msg short offsets
8709 */
8710 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
8711
8712 /* First monitor config packet: low address of the sync */
8713 msg_addr_offset =
8714 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
8715 monitor_base;
8716
8717 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
8718 msg_addr_offset);
8719
8720 /* Second monitor config packet: high address of the sync */
8721 msg_addr_offset =
8722 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
8723 monitor_base;
8724
8725 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
8726 msg_addr_offset);
8727
8728 /*
8729 * Third monitor config packet: the payload, i.e. what to write when the
8730 * sync triggers
8731 */
8732 msg_addr_offset =
8733 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
8734 monitor_base;
8735
8736 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
8737
8738 return size;
8739 }
8740
gaudi_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)8741 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
8742 struct hl_gen_wait_properties *prop)
8743 {
8744 struct hl_cb *cb = (struct hl_cb *) prop->data;
8745 void *buf = cb->kernel_address;
8746 u64 fence_addr = 0;
8747 u32 size = prop->size;
8748
8749 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) {
8750 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
8751 prop->q_idx);
8752 return 0;
8753 }
8754
8755 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr);
8756 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base,
8757 prop->sob_mask, prop->sob_val, prop->mon_id);
8758 size += gaudi_add_fence_pkt(buf + size);
8759
8760 return size;
8761 }
8762
gaudi_reset_sob(struct hl_device * hdev,void * data)8763 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
8764 {
8765 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
8766
8767 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
8768 hw_sob->sob_id);
8769
8770 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
8771 hw_sob->sob_id * 4, 0);
8772
8773 kref_init(&hw_sob->kref);
8774 }
8775
gaudi_get_device_time(struct hl_device * hdev)8776 static u64 gaudi_get_device_time(struct hl_device *hdev)
8777 {
8778 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
8779
8780 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
8781 }
8782
gaudi_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)8783 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
8784 u32 *block_size, u32 *block_id)
8785 {
8786 return -EPERM;
8787 }
8788
gaudi_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)8789 static int gaudi_block_mmap(struct hl_device *hdev,
8790 struct vm_area_struct *vma,
8791 u32 block_id, u32 block_size)
8792 {
8793 return -EPERM;
8794 }
8795
gaudi_enable_events_from_fw(struct hl_device * hdev)8796 static void gaudi_enable_events_from_fw(struct hl_device *hdev)
8797 {
8798 struct cpu_dyn_regs *dyn_regs =
8799 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
8800 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ?
8801 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR :
8802 le32_to_cpu(dyn_regs->gic_host_ints_irq);
8803
8804 WREG32(irq_handler_offset,
8805 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id);
8806 }
8807
gaudi_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)8808 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
8809 {
8810 return -EINVAL;
8811 }
8812
gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)8813 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
8814 {
8815 switch (pll_idx) {
8816 case HL_GAUDI_CPU_PLL: return CPU_PLL;
8817 case HL_GAUDI_PCI_PLL: return PCI_PLL;
8818 case HL_GAUDI_NIC_PLL: return NIC_PLL;
8819 case HL_GAUDI_DMA_PLL: return DMA_PLL;
8820 case HL_GAUDI_MESH_PLL: return MESH_PLL;
8821 case HL_GAUDI_MME_PLL: return MME_PLL;
8822 case HL_GAUDI_TPC_PLL: return TPC_PLL;
8823 case HL_GAUDI_IF_PLL: return IF_PLL;
8824 case HL_GAUDI_SRAM_PLL: return SRAM_PLL;
8825 case HL_GAUDI_HBM_PLL: return HBM_PLL;
8826 default: return -EINVAL;
8827 }
8828 }
8829
gaudi_add_sync_to_engine_map_entry(struct hl_sync_to_engine_map * map,u32 reg_value,enum hl_sync_engine_type engine_type,u32 engine_id)8830 static int gaudi_add_sync_to_engine_map_entry(
8831 struct hl_sync_to_engine_map *map, u32 reg_value,
8832 enum hl_sync_engine_type engine_type, u32 engine_id)
8833 {
8834 struct hl_sync_to_engine_map_entry *entry;
8835
8836 /* Reg value represents a partial address of sync object,
8837 * it is used as unique identifier. For this we need to
8838 * clear the cutoff cfg base bits from the value.
8839 */
8840 if (reg_value == 0 || reg_value == 0xffffffff)
8841 return 0;
8842 reg_value -= lower_32_bits(CFG_BASE);
8843
8844 /* create a new hash entry */
8845 entry = kzalloc_obj(*entry);
8846 if (!entry)
8847 return -ENOMEM;
8848 entry->engine_type = engine_type;
8849 entry->engine_id = engine_id;
8850 entry->sync_id = reg_value;
8851 hash_add(map->tb, &entry->node, reg_value);
8852
8853 return 0;
8854 }
8855
gaudi_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)8856 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
8857 struct hl_sync_to_engine_map *map)
8858 {
8859 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8860 int i, j, rc;
8861 u32 reg_value;
8862
8863 /* Iterate over TPC engines */
8864 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
8865
8866 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
8867 sds->props[SP_NEXT_TPC] * i);
8868
8869 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8870 ENGINE_TPC, i);
8871 if (rc)
8872 goto free_sync_to_engine_map;
8873 }
8874
8875 /* Iterate over MME engines */
8876 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
8877 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
8878
8879 reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
8880 sds->props[SP_NEXT_MME] * i +
8881 j * sizeof(u32));
8882
8883 rc = gaudi_add_sync_to_engine_map_entry(
8884 map, reg_value, ENGINE_MME,
8885 i * sds->props[SP_SUB_MME_ENG_NUM] + j);
8886 if (rc)
8887 goto free_sync_to_engine_map;
8888 }
8889 }
8890
8891 /* Iterate over DMA engines */
8892 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
8893 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
8894 sds->props[SP_DMA_QUEUES_OFFSET] * i);
8895 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
8896 ENGINE_DMA, i);
8897 if (rc)
8898 goto free_sync_to_engine_map;
8899 }
8900
8901 return 0;
8902
8903 free_sync_to_engine_map:
8904 hl_state_dump_free_sync_to_engine_map(map);
8905
8906 return rc;
8907 }
8908
gaudi_monitor_valid(struct hl_mon_state_dump * mon)8909 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
8910 {
8911 return FIELD_GET(
8912 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
8913 mon->status);
8914 }
8915
gaudi_fill_sobs_from_mon(char * sobs,struct hl_mon_state_dump * mon)8916 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
8917 {
8918 const size_t max_write = 10;
8919 u32 gid, mask, sob;
8920 int i, offset;
8921
8922 /* Sync object ID is calculated as follows:
8923 * (8 * group_id + cleared bits in mask)
8924 */
8925 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8926 mon->arm_data);
8927 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8928 mon->arm_data);
8929
8930 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
8931 max_write; mask >>= 1, i++) {
8932 if (!(mask & 1)) {
8933 sob = gid * MONITOR_MAX_SOBS + i;
8934
8935 if (offset > 0)
8936 offset += snprintf(sobs + offset, max_write,
8937 ", ");
8938
8939 offset += snprintf(sobs + offset, max_write, "%u", sob);
8940 }
8941 }
8942 }
8943
gaudi_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)8944 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
8945 struct hl_device *hdev,
8946 struct hl_mon_state_dump *mon)
8947 {
8948 const char *name;
8949 char scratch_buf1[BIN_REG_STRING_SIZE],
8950 scratch_buf2[BIN_REG_STRING_SIZE];
8951 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
8952
8953 name = hl_state_dump_get_monitor_name(hdev, mon);
8954 if (!name)
8955 name = "";
8956
8957 gaudi_fill_sobs_from_mon(monitored_sobs, mon);
8958
8959 return hl_snprintf_resize(
8960 buf, size, offset,
8961 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
8962 mon->id, name,
8963 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
8964 mon->arm_data),
8965 hl_format_as_binary(
8966 scratch_buf1, sizeof(scratch_buf1),
8967 FIELD_GET(
8968 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
8969 mon->arm_data)),
8970 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
8971 mon->arm_data),
8972 mon->wr_data,
8973 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
8974 hl_format_as_binary(
8975 scratch_buf2, sizeof(scratch_buf2),
8976 FIELD_GET(
8977 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
8978 mon->status)),
8979 monitored_sobs);
8980 }
8981
8982
gaudi_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)8983 static int gaudi_print_fences_single_engine(
8984 struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
8985 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
8986 size_t *size, size_t *offset)
8987 {
8988 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
8989 int rc = -ENOMEM, i;
8990 u32 *statuses, *fences;
8991
8992 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
8993 sizeof(*statuses), GFP_KERNEL);
8994 if (!statuses)
8995 goto out;
8996
8997 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
8998 sds->props[SP_ENGINE_NUM_OF_QUEUES],
8999 sizeof(*fences), GFP_KERNEL);
9000 if (!fences)
9001 goto free_status;
9002
9003 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
9004 statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
9005
9006 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
9007 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
9008 fences[i] = RREG32(base_offset + i * sizeof(u32));
9009
9010 /* The actual print */
9011 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
9012 u32 fence_id;
9013 u64 fence_cnt, fence_rdata;
9014 const char *engine_name;
9015
9016 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
9017 statuses[i]))
9018 continue;
9019
9020 fence_id =
9021 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
9022 fence_cnt = base_offset + CFG_BASE +
9023 sizeof(u32) *
9024 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
9025 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
9026 sds->props[SP_FENCE0_RDATA_OFFSET];
9027 engine_name = hl_sync_engine_to_string(engine_type);
9028
9029 rc = hl_snprintf_resize(
9030 buf, size, offset,
9031 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
9032 engine_name, engine_id,
9033 i, fence_id,
9034 fence_cnt, engine_name, engine_id, fence_id, i,
9035 fence_rdata, engine_name, engine_id, fence_id, i,
9036 fences[fence_id],
9037 statuses[i]);
9038 if (rc)
9039 goto free_fences;
9040 }
9041
9042 rc = 0;
9043
9044 free_fences:
9045 kfree(fences);
9046 free_status:
9047 kfree(statuses);
9048 out:
9049 return rc;
9050 }
9051
9052
9053 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
9054 .monitor_valid = gaudi_monitor_valid,
9055 .print_single_monitor = gaudi_print_single_monitor,
9056 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
9057 .print_fences_single_engine = gaudi_print_fences_single_engine,
9058 };
9059
gaudi_state_dump_init(struct hl_device * hdev)9060 static void gaudi_state_dump_init(struct hl_device *hdev)
9061 {
9062 struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
9063 int i;
9064
9065 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
9066 hash_add(sds->so_id_to_str_tb,
9067 &gaudi_so_id_to_str[i].node,
9068 gaudi_so_id_to_str[i].id);
9069
9070 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
9071 hash_add(sds->monitor_id_to_str_tb,
9072 &gaudi_monitor_id_to_str[i].node,
9073 gaudi_monitor_id_to_str[i].id);
9074
9075 sds->props = gaudi_state_dump_specs_props;
9076
9077 sds->sync_namager_names = gaudi_sync_manager_names;
9078
9079 sds->funcs = gaudi_state_dump_funcs;
9080 }
9081
gaudi_get_stream_master_qid_arr(void)9082 static u32 *gaudi_get_stream_master_qid_arr(void)
9083 {
9084 return gaudi_stream_master;
9085 }
9086
gaudi_set_dram_properties(struct hl_device * hdev)9087 static int gaudi_set_dram_properties(struct hl_device *hdev)
9088 {
9089 return 0;
9090 }
9091
gaudi_set_binning_masks(struct hl_device * hdev)9092 static int gaudi_set_binning_masks(struct hl_device *hdev)
9093 {
9094 return 0;
9095 }
9096
gaudi_check_if_razwi_happened(struct hl_device * hdev)9097 static void gaudi_check_if_razwi_happened(struct hl_device *hdev)
9098 {
9099 }
9100
infineon_ver_show(struct device * dev,struct device_attribute * attr,char * buf)9101 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf)
9102 {
9103 struct hl_device *hdev = dev_get_drvdata(dev);
9104 struct cpucp_info *cpucp_info;
9105
9106 cpucp_info = &hdev->asic_prop.cpucp_info;
9107
9108 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version));
9109 }
9110
9111 static DEVICE_ATTR_RO(infineon_ver);
9112
9113 static struct attribute *gaudi_vrm_dev_attrs[] = {
9114 &dev_attr_infineon_ver.attr,
9115 NULL,
9116 };
9117
gaudi_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)9118 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
9119 struct attribute_group *dev_vrm_attr_grp)
9120 {
9121 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
9122 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs;
9123 }
9124
gaudi_send_device_activity(struct hl_device * hdev,bool open)9125 static int gaudi_send_device_activity(struct hl_device *hdev, bool open)
9126 {
9127 return 0;
9128 }
9129
9130 static const struct hl_asic_funcs gaudi_funcs = {
9131 .early_init = gaudi_early_init,
9132 .early_fini = gaudi_early_fini,
9133 .late_init = gaudi_late_init,
9134 .late_fini = gaudi_late_fini,
9135 .sw_init = gaudi_sw_init,
9136 .sw_fini = gaudi_sw_fini,
9137 .hw_init = gaudi_hw_init,
9138 .hw_fini = gaudi_hw_fini,
9139 .halt_engines = gaudi_halt_engines,
9140 .suspend = gaudi_suspend,
9141 .resume = gaudi_resume,
9142 .mmap = gaudi_mmap,
9143 .ring_doorbell = gaudi_ring_doorbell,
9144 .pqe_write = gaudi_pqe_write,
9145 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
9146 .asic_dma_free_coherent = gaudi_dma_free_coherent,
9147 .scrub_device_mem = gaudi_scrub_device_mem,
9148 .scrub_device_dram = gaudi_scrub_device_dram,
9149 .get_int_queue_base = gaudi_get_int_queue_base,
9150 .test_queues = gaudi_test_queues,
9151 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
9152 .asic_dma_pool_free = gaudi_dma_pool_free,
9153 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
9154 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
9155 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
9156 .cs_parser = gaudi_cs_parser,
9157 .dma_map_sgtable = hl_asic_dma_map_sgtable,
9158 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
9159 .update_eq_ci = gaudi_update_eq_ci,
9160 .context_switch = gaudi_context_switch,
9161 .restore_phase_topology = gaudi_restore_phase_topology,
9162 .debugfs_read_dma = gaudi_debugfs_read_dma,
9163 .add_device_attr = gaudi_add_device_attr,
9164 .handle_eqe = gaudi_handle_eqe,
9165 .get_events_stat = gaudi_get_events_stat,
9166 .read_pte = gaudi_read_pte,
9167 .write_pte = gaudi_write_pte,
9168 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
9169 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
9170 .mmu_prefetch_cache_range = NULL,
9171 .send_heartbeat = gaudi_send_heartbeat,
9172 .debug_coresight = gaudi_debug_coresight,
9173 .is_device_idle = gaudi_is_device_idle,
9174 .compute_reset_late_init = gaudi_compute_reset_late_init,
9175 .hw_queues_lock = gaudi_hw_queues_lock,
9176 .hw_queues_unlock = gaudi_hw_queues_unlock,
9177 .get_pci_id = gaudi_get_pci_id,
9178 .get_eeprom_data = gaudi_get_eeprom_data,
9179 .get_monitor_dump = gaudi_get_monitor_dump,
9180 .send_cpu_message = gaudi_send_cpu_message,
9181 .pci_bars_map = gaudi_pci_bars_map,
9182 .init_iatu = gaudi_init_iatu,
9183 .rreg = hl_rreg,
9184 .wreg = hl_wreg,
9185 .halt_coresight = gaudi_halt_coresight,
9186 .ctx_init = gaudi_ctx_init,
9187 .ctx_fini = gaudi_ctx_fini,
9188 .pre_schedule_cs = gaudi_pre_schedule_cs,
9189 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
9190 .load_firmware_to_device = gaudi_load_firmware_to_device,
9191 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
9192 .get_signal_cb_size = gaudi_get_signal_cb_size,
9193 .get_wait_cb_size = gaudi_get_wait_cb_size,
9194 .gen_signal_cb = gaudi_gen_signal_cb,
9195 .gen_wait_cb = gaudi_gen_wait_cb,
9196 .reset_sob = gaudi_reset_sob,
9197 .reset_sob_group = gaudi_reset_sob_group,
9198 .get_device_time = gaudi_get_device_time,
9199 .pb_print_security_errors = NULL,
9200 .collective_wait_init_cs = gaudi_collective_wait_init_cs,
9201 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
9202 .get_dec_base_addr = NULL,
9203 .scramble_addr = hl_mmu_scramble_addr,
9204 .descramble_addr = hl_mmu_descramble_addr,
9205 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
9206 .get_hw_block_id = gaudi_get_hw_block_id,
9207 .hw_block_mmap = gaudi_block_mmap,
9208 .enable_events_from_fw = gaudi_enable_events_from_fw,
9209 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error,
9210 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
9211 .init_firmware_preload_params = gaudi_init_firmware_preload_params,
9212 .init_firmware_loader = gaudi_init_firmware_loader,
9213 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
9214 .state_dump_init = gaudi_state_dump_init,
9215 .get_sob_addr = gaudi_get_sob_addr,
9216 .set_pci_memory_regions = gaudi_set_pci_memory_regions,
9217 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr,
9218 .check_if_razwi_happened = gaudi_check_if_razwi_happened,
9219 .mmu_get_real_page_size = hl_mmu_get_real_page_size,
9220 .access_dev_mem = hl_access_dev_mem,
9221 .set_dram_bar_base = gaudi_set_hbm_bar_base,
9222 .send_device_activity = gaudi_send_device_activity,
9223 .set_dram_properties = gaudi_set_dram_properties,
9224 .set_binning_masks = gaudi_set_binning_masks,
9225 };
9226
9227 /**
9228 * gaudi_set_asic_funcs - set GAUDI function pointers
9229 *
9230 * @hdev: pointer to hl_device structure
9231 *
9232 */
gaudi_set_asic_funcs(struct hl_device * hdev)9233 void gaudi_set_asic_funcs(struct hl_device *hdev)
9234 {
9235 hdev->asic_funcs = &gaudi_funcs;
9236 }
9237