1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2020-2022 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "gaudi2P.h"
9 #include "gaudi2_masks.h"
10 #include "../include/gaudi2/gaudi2_special_blocks.h"
11 #include "../include/hw_ip/mmu/mmu_general.h"
12 #include "../include/hw_ip/mmu/mmu_v2_0.h"
13 #include "../include/gaudi2/gaudi2_packets.h"
14 #include "../include/gaudi2/gaudi2_reg_map.h"
15 #include "../include/gaudi2/gaudi2_async_ids_map_extended.h"
16 #include "../include/gaudi2/arc/gaudi2_arc_common_packets.h"
17
18 #include <linux/module.h>
19 #include <linux/pci.h>
20 #include <linux/hwmon.h>
21 #include <linux/iommu.h>
22
23 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */
24
25 #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
26
27 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 500000 /* 500ms */
28 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */
29 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */
30 #define GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC 3000000 /* 3s */
31 #define GAUDI2_RESET_POLL_CNT 3
32 #define GAUDI2_RESET_WAIT_MSEC 1 /* 1ms */
33 #define GAUDI2_CPU_RESET_WAIT_MSEC 100 /* 100ms */
34 #define GAUDI2_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
35 #define GAUDI2_CB_POOL_CB_CNT 512
36 #define GAUDI2_CB_POOL_CB_SIZE SZ_128K /* 128KB */
37 #define GAUDI2_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */
38 #define GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC 25000000 /* 25s */
39 #define GAUDI2_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
40 #define GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
41
42 #define GAUDI2_ALLOC_CPU_MEM_RETRY_CNT 3
43
44 /*
45 * since the code already has built-in support for binning of up to MAX_FAULTY_TPCS TPCs
46 * and the code relies on that value (for array size etc..) we define another value
47 * for MAX faulty TPCs which reflects the cluster binning requirements
48 */
49 #define MAX_CLUSTER_BINNING_FAULTY_TPCS 1
50 #define MAX_FAULTY_XBARS 1
51 #define MAX_FAULTY_EDMAS 1
52 #define MAX_FAULTY_DECODERS 1
53
54 #define GAUDI2_TPC_FULL_MASK 0x1FFFFFF
55 #define GAUDI2_HIF_HMMU_FULL_MASK 0xFFFF
56 #define GAUDI2_DECODER_FULL_MASK 0x3FF
57
58 #define GAUDI2_NA_EVENT_CAUSE 0xFF
59 #define GAUDI2_NUM_OF_QM_ERR_CAUSE 18
60 #define GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE 25
61 #define GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE 3
62 #define GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE 14
63 #define GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE 3
64 #define GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE 2
65 #define GAUDI2_NUM_OF_ROT_ERR_CAUSE 22
66 #define GAUDI2_NUM_OF_TPC_INTR_CAUSE 31
67 #define GAUDI2_NUM_OF_DEC_ERR_CAUSE 25
68 #define GAUDI2_NUM_OF_MME_ERR_CAUSE 16
69 #define GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE 7
70 #define GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE 8
71 #define GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE 19
72 #define GAUDI2_NUM_OF_HBM_SEI_CAUSE 9
73 #define GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE 3
74 #define GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE 3
75 #define GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE 2
76 #define GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE 2
77 #define GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE 2
78 #define GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE 5
79
80 #define GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 10)
81 #define GAUDI2_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 200)
82 #define GAUDI2_ARB_WDT_TIMEOUT (0x1000000)
83
84 #define GAUDI2_VDEC_TIMEOUT_USEC 10000 /* 10ms */
85 #define GAUDI2_PLDM_VDEC_TIMEOUT_USEC (GAUDI2_VDEC_TIMEOUT_USEC * 100)
86
87 #define KDMA_TIMEOUT_USEC USEC_PER_SEC
88
89 #define IS_DMA_IDLE(dma_core_sts0) \
90 (!((dma_core_sts0) & (DCORE0_EDMA0_CORE_STS0_BUSY_MASK)))
91
92 #define IS_DMA_HALTED(dma_core_sts1) \
93 ((dma_core_sts1) & (DCORE0_EDMA0_CORE_STS1_IS_HALT_MASK))
94
95 #define IS_MME_IDLE(mme_arch_sts) (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK)
96
97 #define IS_TPC_IDLE(tpc_cfg_sts) (((tpc_cfg_sts) & (TPC_IDLE_MASK)) == (TPC_IDLE_MASK))
98
99 #define IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) \
100 ((((qm_glbl_sts0) & (QM_IDLE_MASK)) == (QM_IDLE_MASK)) && \
101 (((qm_glbl_sts1) & (QM_ARC_IDLE_MASK)) == (QM_ARC_IDLE_MASK)) && \
102 (((qm_cgm_sts) & (CGM_IDLE_MASK)) == (CGM_IDLE_MASK)))
103
104 #define PCIE_DEC_EN_MASK 0x300
105 #define DEC_WORK_STATE_IDLE 0
106 #define DEC_WORK_STATE_PEND 3
107 #define IS_DEC_IDLE(dec_swreg15) \
108 (((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_IDLE || \
109 ((dec_swreg15) & DCORE0_DEC0_CMD_SWREG15_SW_WORK_STATE_MASK) == DEC_WORK_STATE_PEND)
110
111 /* HBM MMU address scrambling parameters */
112 #define GAUDI2_HBM_MMU_SCRM_MEM_SIZE SZ_8M
113 #define GAUDI2_HBM_MMU_SCRM_DIV_SHIFT 26
114 #define GAUDI2_HBM_MMU_SCRM_MOD_SHIFT 0
115 #define GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK DRAM_VA_HINT_MASK
116 #define GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR 16
117 #define MMU_RANGE_INV_VA_LSB_SHIFT 12
118 #define MMU_RANGE_INV_VA_MSB_SHIFT 44
119 #define MMU_RANGE_INV_EN_SHIFT 0
120 #define MMU_RANGE_INV_ASID_EN_SHIFT 1
121 #define MMU_RANGE_INV_ASID_SHIFT 2
122
123 /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has
124 * a 2 entries FIFO, and hence it is not enabled for it.
125 */
126 #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0)
127 #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0)
128
129 #define GAUDI2_MAX_STRING_LEN 64
130
131 #define GAUDI2_VDEC_MSIX_ENTRIES (GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
132 GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
133
134 #define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
135
136 /* RAZWI initiator coordinates */
137 #define RAZWI_GET_AXUSER_XY(x) \
138 ((x & 0xF8001FF0) >> 4)
139
140 #define RAZWI_GET_AXUSER_LOW_XY(x) \
141 ((x & 0x00001FF0) >> 4)
142
143 #define RAZWI_INITIATOR_AXUER_L_X_SHIFT 0
144 #define RAZWI_INITIATOR_AXUER_L_X_MASK 0x1F
145 #define RAZWI_INITIATOR_AXUER_L_Y_SHIFT 5
146 #define RAZWI_INITIATOR_AXUER_L_Y_MASK 0xF
147
148 #define RAZWI_INITIATOR_AXUER_H_X_SHIFT 23
149 #define RAZWI_INITIATOR_AXUER_H_X_MASK 0x1F
150
151 #define RAZWI_INITIATOR_ID_X_Y_LOW(x, y) \
152 ((((y) & RAZWI_INITIATOR_AXUER_L_Y_MASK) << RAZWI_INITIATOR_AXUER_L_Y_SHIFT) | \
153 (((x) & RAZWI_INITIATOR_AXUER_L_X_MASK) << RAZWI_INITIATOR_AXUER_L_X_SHIFT))
154
155 #define RAZWI_INITIATOR_ID_X_HIGH(x) \
156 (((x) & RAZWI_INITIATOR_AXUER_H_X_MASK) << RAZWI_INITIATOR_AXUER_H_X_SHIFT)
157
158 #define RAZWI_INITIATOR_ID_X_Y(xl, yl, xh) \
159 (RAZWI_INITIATOR_ID_X_Y_LOW(xl, yl) | RAZWI_INITIATOR_ID_X_HIGH(xh))
160
161 #define PSOC_RAZWI_ENG_STR_SIZE 128
162 #define PSOC_RAZWI_MAX_ENG_PER_RTR 5
163
164 /* HW scrambles only bits 0-25 */
165 #define HW_UNSCRAMBLED_BITS_MASK GENMASK_ULL(63, 26)
166
167 #define GAUDI2_GLBL_ERR_MAX_CAUSE_NUM 17
168
169 struct gaudi2_razwi_info {
170 u32 axuser_xy;
171 u32 rtr_ctrl;
172 u16 eng_id;
173 char *eng_name;
174 };
175
176 static struct gaudi2_razwi_info common_razwi_info[] = {
177 {RAZWI_INITIATOR_ID_X_Y(2, 4, 0), mmDCORE0_RTR0_CTRL_BASE,
178 GAUDI2_DCORE0_ENGINE_ID_DEC_0, "DEC0"},
179 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
180 GAUDI2_DCORE0_ENGINE_ID_DEC_1, "DEC1"},
181 {RAZWI_INITIATOR_ID_X_Y(17, 4, 18), mmDCORE1_RTR7_CTRL_BASE,
182 GAUDI2_DCORE1_ENGINE_ID_DEC_0, "DEC2"},
183 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
184 GAUDI2_DCORE1_ENGINE_ID_DEC_1, "DEC3"},
185 {RAZWI_INITIATOR_ID_X_Y(2, 11, 0), mmDCORE2_RTR0_CTRL_BASE,
186 GAUDI2_DCORE2_ENGINE_ID_DEC_0, "DEC4"},
187 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
188 GAUDI2_DCORE2_ENGINE_ID_DEC_1, "DEC5"},
189 {RAZWI_INITIATOR_ID_X_Y(17, 11, 18), mmDCORE3_RTR7_CTRL_BASE,
190 GAUDI2_DCORE3_ENGINE_ID_DEC_0, "DEC6"},
191 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
192 GAUDI2_DCORE3_ENGINE_ID_DEC_1, "DEC7"},
193 {RAZWI_INITIATOR_ID_X_Y(2, 4, 6), mmDCORE0_RTR0_CTRL_BASE,
194 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC8"},
195 {RAZWI_INITIATOR_ID_X_Y(2, 4, 7), mmDCORE0_RTR0_CTRL_BASE,
196 GAUDI2_PCIE_ENGINE_ID_DEC_0, "DEC9"},
197 {RAZWI_INITIATOR_ID_X_Y(3, 4, 2), mmDCORE0_RTR1_CTRL_BASE,
198 GAUDI2_DCORE0_ENGINE_ID_TPC_0, "TPC0"},
199 {RAZWI_INITIATOR_ID_X_Y(3, 4, 4), mmDCORE0_RTR1_CTRL_BASE,
200 GAUDI2_DCORE0_ENGINE_ID_TPC_1, "TPC1"},
201 {RAZWI_INITIATOR_ID_X_Y(4, 4, 2), mmDCORE0_RTR2_CTRL_BASE,
202 GAUDI2_DCORE0_ENGINE_ID_TPC_2, "TPC2"},
203 {RAZWI_INITIATOR_ID_X_Y(4, 4, 4), mmDCORE0_RTR2_CTRL_BASE,
204 GAUDI2_DCORE0_ENGINE_ID_TPC_3, "TPC3"},
205 {RAZWI_INITIATOR_ID_X_Y(5, 4, 2), mmDCORE0_RTR3_CTRL_BASE,
206 GAUDI2_DCORE0_ENGINE_ID_TPC_4, "TPC4"},
207 {RAZWI_INITIATOR_ID_X_Y(5, 4, 4), mmDCORE0_RTR3_CTRL_BASE,
208 GAUDI2_DCORE0_ENGINE_ID_TPC_5, "TPC5"},
209 {RAZWI_INITIATOR_ID_X_Y(16, 4, 14), mmDCORE1_RTR6_CTRL_BASE,
210 GAUDI2_DCORE1_ENGINE_ID_TPC_0, "TPC6"},
211 {RAZWI_INITIATOR_ID_X_Y(16, 4, 16), mmDCORE1_RTR6_CTRL_BASE,
212 GAUDI2_DCORE1_ENGINE_ID_TPC_1, "TPC7"},
213 {RAZWI_INITIATOR_ID_X_Y(15, 4, 14), mmDCORE1_RTR5_CTRL_BASE,
214 GAUDI2_DCORE1_ENGINE_ID_TPC_2, "TPC8"},
215 {RAZWI_INITIATOR_ID_X_Y(15, 4, 16), mmDCORE1_RTR5_CTRL_BASE,
216 GAUDI2_DCORE1_ENGINE_ID_TPC_3, "TPC9"},
217 {RAZWI_INITIATOR_ID_X_Y(14, 4, 14), mmDCORE1_RTR4_CTRL_BASE,
218 GAUDI2_DCORE1_ENGINE_ID_TPC_4, "TPC10"},
219 {RAZWI_INITIATOR_ID_X_Y(14, 4, 16), mmDCORE1_RTR4_CTRL_BASE,
220 GAUDI2_DCORE1_ENGINE_ID_TPC_5, "TPC11"},
221 {RAZWI_INITIATOR_ID_X_Y(5, 11, 2), mmDCORE2_RTR3_CTRL_BASE,
222 GAUDI2_DCORE2_ENGINE_ID_TPC_0, "TPC12"},
223 {RAZWI_INITIATOR_ID_X_Y(5, 11, 4), mmDCORE2_RTR3_CTRL_BASE,
224 GAUDI2_DCORE2_ENGINE_ID_TPC_1, "TPC13"},
225 {RAZWI_INITIATOR_ID_X_Y(4, 11, 2), mmDCORE2_RTR2_CTRL_BASE,
226 GAUDI2_DCORE2_ENGINE_ID_TPC_2, "TPC14"},
227 {RAZWI_INITIATOR_ID_X_Y(4, 11, 4), mmDCORE2_RTR2_CTRL_BASE,
228 GAUDI2_DCORE2_ENGINE_ID_TPC_3, "TPC15"},
229 {RAZWI_INITIATOR_ID_X_Y(3, 11, 2), mmDCORE2_RTR1_CTRL_BASE,
230 GAUDI2_DCORE2_ENGINE_ID_TPC_4, "TPC16"},
231 {RAZWI_INITIATOR_ID_X_Y(3, 11, 4), mmDCORE2_RTR1_CTRL_BASE,
232 GAUDI2_DCORE2_ENGINE_ID_TPC_5, "TPC17"},
233 {RAZWI_INITIATOR_ID_X_Y(14, 11, 14), mmDCORE3_RTR4_CTRL_BASE,
234 GAUDI2_DCORE3_ENGINE_ID_TPC_0, "TPC18"},
235 {RAZWI_INITIATOR_ID_X_Y(14, 11, 16), mmDCORE3_RTR4_CTRL_BASE,
236 GAUDI2_DCORE3_ENGINE_ID_TPC_1, "TPC19"},
237 {RAZWI_INITIATOR_ID_X_Y(15, 11, 14), mmDCORE3_RTR5_CTRL_BASE,
238 GAUDI2_DCORE3_ENGINE_ID_TPC_2, "TPC20"},
239 {RAZWI_INITIATOR_ID_X_Y(15, 11, 16), mmDCORE3_RTR5_CTRL_BASE,
240 GAUDI2_DCORE3_ENGINE_ID_TPC_3, "TPC21"},
241 {RAZWI_INITIATOR_ID_X_Y(16, 11, 14), mmDCORE3_RTR6_CTRL_BASE,
242 GAUDI2_DCORE3_ENGINE_ID_TPC_4, "TPC22"},
243 {RAZWI_INITIATOR_ID_X_Y(16, 11, 16), mmDCORE3_RTR6_CTRL_BASE,
244 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC23"},
245 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
246 GAUDI2_DCORE3_ENGINE_ID_TPC_5, "TPC24"},
247 {RAZWI_INITIATOR_ID_X_Y(17, 4, 8), mmDCORE1_RTR7_CTRL_BASE,
248 GAUDI2_ENGINE_ID_NIC0_0, "NIC0"},
249 {RAZWI_INITIATOR_ID_X_Y(17, 4, 10), mmDCORE1_RTR7_CTRL_BASE,
250 GAUDI2_ENGINE_ID_NIC0_1, "NIC1"},
251 {RAZWI_INITIATOR_ID_X_Y(17, 4, 12), mmDCORE1_RTR7_CTRL_BASE,
252 GAUDI2_ENGINE_ID_NIC1_0, "NIC2"},
253 {RAZWI_INITIATOR_ID_X_Y(17, 4, 14), mmDCORE1_RTR7_CTRL_BASE,
254 GAUDI2_ENGINE_ID_NIC1_1, "NIC3"},
255 {RAZWI_INITIATOR_ID_X_Y(17, 4, 15), mmDCORE1_RTR7_CTRL_BASE,
256 GAUDI2_ENGINE_ID_NIC2_0, "NIC4"},
257 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
258 GAUDI2_ENGINE_ID_NIC2_1, "NIC5"},
259 {RAZWI_INITIATOR_ID_X_Y(2, 11, 4), mmDCORE2_RTR0_CTRL_BASE,
260 GAUDI2_ENGINE_ID_NIC3_0, "NIC6"},
261 {RAZWI_INITIATOR_ID_X_Y(2, 11, 6), mmDCORE2_RTR0_CTRL_BASE,
262 GAUDI2_ENGINE_ID_NIC3_1, "NIC7"},
263 {RAZWI_INITIATOR_ID_X_Y(2, 11, 8), mmDCORE2_RTR0_CTRL_BASE,
264 GAUDI2_ENGINE_ID_NIC4_0, "NIC8"},
265 {RAZWI_INITIATOR_ID_X_Y(17, 11, 12), mmDCORE3_RTR7_CTRL_BASE,
266 GAUDI2_ENGINE_ID_NIC4_1, "NIC9"},
267 {RAZWI_INITIATOR_ID_X_Y(17, 11, 14), mmDCORE3_RTR7_CTRL_BASE,
268 GAUDI2_ENGINE_ID_NIC5_0, "NIC10"},
269 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
270 GAUDI2_ENGINE_ID_NIC5_1, "NIC11"},
271 {RAZWI_INITIATOR_ID_X_Y(2, 4, 2), mmDCORE0_RTR0_CTRL_BASE,
272 GAUDI2_ENGINE_ID_PDMA_0, "PDMA0"},
273 {RAZWI_INITIATOR_ID_X_Y(2, 4, 3), mmDCORE0_RTR0_CTRL_BASE,
274 GAUDI2_ENGINE_ID_PDMA_1, "PDMA1"},
275 {RAZWI_INITIATOR_ID_X_Y(2, 4, 4), mmDCORE0_RTR0_CTRL_BASE,
276 GAUDI2_ENGINE_ID_SIZE, "PMMU"},
277 {RAZWI_INITIATOR_ID_X_Y(2, 4, 5), mmDCORE0_RTR0_CTRL_BASE,
278 GAUDI2_ENGINE_ID_SIZE, "PCIE"},
279 {RAZWI_INITIATOR_ID_X_Y(17, 4, 16), mmDCORE1_RTR7_CTRL_BASE,
280 GAUDI2_ENGINE_ID_ARC_FARM, "ARC_FARM"},
281 {RAZWI_INITIATOR_ID_X_Y(17, 4, 17), mmDCORE1_RTR7_CTRL_BASE,
282 GAUDI2_ENGINE_ID_KDMA, "KDMA"},
283 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF1_RTR_CTRL_BASE,
284 GAUDI2_DCORE0_ENGINE_ID_EDMA_0, "EDMA0"},
285 {RAZWI_INITIATOR_ID_X_Y(1, 5, 1), mmSFT0_HBW_RTR_IF0_RTR_CTRL_BASE,
286 GAUDI2_DCORE0_ENGINE_ID_EDMA_1, "EDMA1"},
287 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF1_RTR_CTRL_BASE,
288 GAUDI2_DCORE1_ENGINE_ID_EDMA_0, "EDMA2"},
289 {RAZWI_INITIATOR_ID_X_Y(18, 5, 18), mmSFT1_HBW_RTR_IF0_RTR_CTRL_BASE,
290 GAUDI2_DCORE1_ENGINE_ID_EDMA_1, "EDMA3"},
291 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
292 GAUDI2_DCORE2_ENGINE_ID_EDMA_0, "EDMA4"},
293 {RAZWI_INITIATOR_ID_X_Y(1, 10, 1), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
294 GAUDI2_DCORE2_ENGINE_ID_EDMA_1, "EDMA5"},
295 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF0_RTR_CTRL_BASE,
296 GAUDI2_DCORE3_ENGINE_ID_EDMA_0, "EDMA6"},
297 {RAZWI_INITIATOR_ID_X_Y(18, 10, 18), mmSFT2_HBW_RTR_IF1_RTR_CTRL_BASE,
298 GAUDI2_DCORE3_ENGINE_ID_EDMA_1, "EDMA7"},
299 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
300 GAUDI2_ENGINE_ID_SIZE, "HMMU0"},
301 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
302 GAUDI2_ENGINE_ID_SIZE, "HMMU1"},
303 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
304 GAUDI2_ENGINE_ID_SIZE, "HMMU2"},
305 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
306 GAUDI2_ENGINE_ID_SIZE, "HMMU3"},
307 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
308 GAUDI2_ENGINE_ID_SIZE, "HMMU4"},
309 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
310 GAUDI2_ENGINE_ID_SIZE, "HMMU5"},
311 {RAZWI_INITIATOR_ID_X_Y(1, 5, 0), mmDCORE0_RTR0_CTRL_BASE,
312 GAUDI2_ENGINE_ID_SIZE, "HMMU6"},
313 {RAZWI_INITIATOR_ID_X_Y(18, 5, 19), mmDCORE1_RTR7_CTRL_BASE,
314 GAUDI2_ENGINE_ID_SIZE, "HMMU7"},
315 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
316 GAUDI2_ENGINE_ID_SIZE, "HMMU8"},
317 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
318 GAUDI2_ENGINE_ID_SIZE, "HMMU9"},
319 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
320 GAUDI2_ENGINE_ID_SIZE, "HMMU10"},
321 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
322 GAUDI2_ENGINE_ID_SIZE, "HMMU11"},
323 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
324 GAUDI2_ENGINE_ID_SIZE, "HMMU12"},
325 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
326 GAUDI2_ENGINE_ID_SIZE, "HMMU13"},
327 {RAZWI_INITIATOR_ID_X_Y(1, 10, 0), mmDCORE2_RTR0_CTRL_BASE,
328 GAUDI2_ENGINE_ID_SIZE, "HMMU14"},
329 {RAZWI_INITIATOR_ID_X_Y(18, 10, 19), mmDCORE3_RTR7_CTRL_BASE,
330 GAUDI2_ENGINE_ID_SIZE, "HMMU15"},
331 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
332 GAUDI2_ENGINE_ID_ROT_0, "ROT0"},
333 {RAZWI_INITIATOR_ID_X_Y(17, 11, 16), mmDCORE3_RTR7_CTRL_BASE,
334 GAUDI2_ENGINE_ID_ROT_1, "ROT1"},
335 {RAZWI_INITIATOR_ID_X_Y(2, 11, 2), mmDCORE2_RTR0_CTRL_BASE,
336 GAUDI2_ENGINE_ID_PSOC, "CPU"},
337 {RAZWI_INITIATOR_ID_X_Y(17, 11, 11), mmDCORE3_RTR7_CTRL_BASE,
338 GAUDI2_ENGINE_ID_PSOC, "PSOC"}
339 };
340
341 static struct gaudi2_razwi_info mme_razwi_info[] = {
342 /* MME X high coordinate is N/A, hence using only low coordinates */
343 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
344 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP0"},
345 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
346 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_WAP1"},
347 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
348 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_WR"},
349 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
350 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_CTRL_RD"},
351 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
352 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE0"},
353 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 4), mmDCORE0_RTR4_CTRL_BASE,
354 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE1"},
355 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 4), mmDCORE0_RTR5_CTRL_BASE,
356 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE2"},
357 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 4), mmDCORE0_RTR6_CTRL_BASE,
358 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE3"},
359 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 4), mmDCORE0_RTR7_CTRL_BASE,
360 GAUDI2_DCORE0_ENGINE_ID_MME, "MME0_SBTE4"},
361 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
362 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP0"},
363 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
364 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_WAP1"},
365 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
366 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_WR"},
367 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
368 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_CTRL_RD"},
369 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
370 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE0"},
371 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 4), mmDCORE1_RTR3_CTRL_BASE,
372 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE1"},
373 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 4), mmDCORE1_RTR2_CTRL_BASE,
374 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE2"},
375 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 4), mmDCORE1_RTR1_CTRL_BASE,
376 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE3"},
377 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 4), mmDCORE1_RTR0_CTRL_BASE,
378 GAUDI2_DCORE1_ENGINE_ID_MME, "MME1_SBTE4"},
379 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
380 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP0"},
381 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
382 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_WAP1"},
383 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
384 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_WR"},
385 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
386 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_CTRL_RD"},
387 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
388 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE0"},
389 {RAZWI_INITIATOR_ID_X_Y_LOW(6, 11), mmDCORE2_RTR4_CTRL_BASE,
390 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE1"},
391 {RAZWI_INITIATOR_ID_X_Y_LOW(7, 11), mmDCORE2_RTR5_CTRL_BASE,
392 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE2"},
393 {RAZWI_INITIATOR_ID_X_Y_LOW(8, 11), mmDCORE2_RTR6_CTRL_BASE,
394 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE3"},
395 {RAZWI_INITIATOR_ID_X_Y_LOW(9, 11), mmDCORE2_RTR7_CTRL_BASE,
396 GAUDI2_DCORE2_ENGINE_ID_MME, "MME2_SBTE4"},
397 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
398 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP0"},
399 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
400 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_WAP1"},
401 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
402 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_WR"},
403 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
404 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_CTRL_RD"},
405 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
406 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE0"},
407 {RAZWI_INITIATOR_ID_X_Y_LOW(13, 11), mmDCORE3_RTR3_CTRL_BASE,
408 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE1"},
409 {RAZWI_INITIATOR_ID_X_Y_LOW(12, 11), mmDCORE3_RTR2_CTRL_BASE,
410 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE2"},
411 {RAZWI_INITIATOR_ID_X_Y_LOW(11, 11), mmDCORE3_RTR1_CTRL_BASE,
412 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE3"},
413 {RAZWI_INITIATOR_ID_X_Y_LOW(10, 11), mmDCORE3_RTR0_CTRL_BASE,
414 GAUDI2_DCORE3_ENGINE_ID_MME, "MME3_SBTE4"}
415 };
416
417 enum hl_pmmu_fatal_cause {
418 LATENCY_RD_OUT_FIFO_OVERRUN,
419 LATENCY_WR_OUT_FIFO_OVERRUN,
420 };
421
422 enum hl_pcie_drain_ind_cause {
423 LBW_AXI_DRAIN_IND,
424 HBW_AXI_DRAIN_IND
425 };
426
427 static const u32 cluster_hmmu_hif_enabled_mask[GAUDI2_HBM_NUM] = {
428 [HBM_ID0] = 0xFFFC,
429 [HBM_ID1] = 0xFFCF,
430 [HBM_ID2] = 0xF7F7,
431 [HBM_ID3] = 0x7F7F,
432 [HBM_ID4] = 0xFCFF,
433 [HBM_ID5] = 0xCFFF,
434 };
435
436 static const u8 xbar_edge_to_hbm_cluster[EDMA_ID_SIZE] = {
437 [0] = HBM_ID0,
438 [1] = HBM_ID1,
439 [2] = HBM_ID4,
440 [3] = HBM_ID5,
441 };
442
443 static const u8 edma_to_hbm_cluster[EDMA_ID_SIZE] = {
444 [EDMA_ID_DCORE0_INSTANCE0] = HBM_ID0,
445 [EDMA_ID_DCORE0_INSTANCE1] = HBM_ID2,
446 [EDMA_ID_DCORE1_INSTANCE0] = HBM_ID1,
447 [EDMA_ID_DCORE1_INSTANCE1] = HBM_ID3,
448 [EDMA_ID_DCORE2_INSTANCE0] = HBM_ID2,
449 [EDMA_ID_DCORE2_INSTANCE1] = HBM_ID4,
450 [EDMA_ID_DCORE3_INSTANCE0] = HBM_ID3,
451 [EDMA_ID_DCORE3_INSTANCE1] = HBM_ID5,
452 };
453
454 static const int gaudi2_qman_async_event_id[] = {
455 [GAUDI2_QUEUE_ID_PDMA_0_0] = GAUDI2_EVENT_PDMA0_QM,
456 [GAUDI2_QUEUE_ID_PDMA_0_1] = GAUDI2_EVENT_PDMA0_QM,
457 [GAUDI2_QUEUE_ID_PDMA_0_2] = GAUDI2_EVENT_PDMA0_QM,
458 [GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_EVENT_PDMA0_QM,
459 [GAUDI2_QUEUE_ID_PDMA_1_0] = GAUDI2_EVENT_PDMA1_QM,
460 [GAUDI2_QUEUE_ID_PDMA_1_1] = GAUDI2_EVENT_PDMA1_QM,
461 [GAUDI2_QUEUE_ID_PDMA_1_2] = GAUDI2_EVENT_PDMA1_QM,
462 [GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_EVENT_PDMA1_QM,
463 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = GAUDI2_EVENT_HDMA0_QM,
464 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = GAUDI2_EVENT_HDMA0_QM,
465 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = GAUDI2_EVENT_HDMA0_QM,
466 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = GAUDI2_EVENT_HDMA0_QM,
467 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = GAUDI2_EVENT_HDMA1_QM,
468 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = GAUDI2_EVENT_HDMA1_QM,
469 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = GAUDI2_EVENT_HDMA1_QM,
470 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = GAUDI2_EVENT_HDMA1_QM,
471 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = GAUDI2_EVENT_MME0_QM,
472 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = GAUDI2_EVENT_MME0_QM,
473 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = GAUDI2_EVENT_MME0_QM,
474 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = GAUDI2_EVENT_MME0_QM,
475 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = GAUDI2_EVENT_TPC0_QM,
476 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = GAUDI2_EVENT_TPC0_QM,
477 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = GAUDI2_EVENT_TPC0_QM,
478 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = GAUDI2_EVENT_TPC0_QM,
479 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = GAUDI2_EVENT_TPC1_QM,
480 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = GAUDI2_EVENT_TPC1_QM,
481 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = GAUDI2_EVENT_TPC1_QM,
482 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = GAUDI2_EVENT_TPC1_QM,
483 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = GAUDI2_EVENT_TPC2_QM,
484 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = GAUDI2_EVENT_TPC2_QM,
485 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = GAUDI2_EVENT_TPC2_QM,
486 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = GAUDI2_EVENT_TPC2_QM,
487 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = GAUDI2_EVENT_TPC3_QM,
488 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = GAUDI2_EVENT_TPC3_QM,
489 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = GAUDI2_EVENT_TPC3_QM,
490 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = GAUDI2_EVENT_TPC3_QM,
491 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = GAUDI2_EVENT_TPC4_QM,
492 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = GAUDI2_EVENT_TPC4_QM,
493 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = GAUDI2_EVENT_TPC4_QM,
494 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = GAUDI2_EVENT_TPC4_QM,
495 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = GAUDI2_EVENT_TPC5_QM,
496 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = GAUDI2_EVENT_TPC5_QM,
497 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = GAUDI2_EVENT_TPC5_QM,
498 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = GAUDI2_EVENT_TPC5_QM,
499 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = GAUDI2_EVENT_TPC24_QM,
500 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = GAUDI2_EVENT_TPC24_QM,
501 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = GAUDI2_EVENT_TPC24_QM,
502 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = GAUDI2_EVENT_TPC24_QM,
503 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = GAUDI2_EVENT_HDMA2_QM,
504 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = GAUDI2_EVENT_HDMA2_QM,
505 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = GAUDI2_EVENT_HDMA2_QM,
506 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = GAUDI2_EVENT_HDMA2_QM,
507 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = GAUDI2_EVENT_HDMA3_QM,
508 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = GAUDI2_EVENT_HDMA3_QM,
509 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = GAUDI2_EVENT_HDMA3_QM,
510 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = GAUDI2_EVENT_HDMA3_QM,
511 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = GAUDI2_EVENT_MME1_QM,
512 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = GAUDI2_EVENT_MME1_QM,
513 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = GAUDI2_EVENT_MME1_QM,
514 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = GAUDI2_EVENT_MME1_QM,
515 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = GAUDI2_EVENT_TPC6_QM,
516 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = GAUDI2_EVENT_TPC6_QM,
517 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = GAUDI2_EVENT_TPC6_QM,
518 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = GAUDI2_EVENT_TPC6_QM,
519 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = GAUDI2_EVENT_TPC7_QM,
520 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = GAUDI2_EVENT_TPC7_QM,
521 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = GAUDI2_EVENT_TPC7_QM,
522 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = GAUDI2_EVENT_TPC7_QM,
523 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = GAUDI2_EVENT_TPC8_QM,
524 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = GAUDI2_EVENT_TPC8_QM,
525 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = GAUDI2_EVENT_TPC8_QM,
526 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = GAUDI2_EVENT_TPC8_QM,
527 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = GAUDI2_EVENT_TPC9_QM,
528 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = GAUDI2_EVENT_TPC9_QM,
529 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = GAUDI2_EVENT_TPC9_QM,
530 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = GAUDI2_EVENT_TPC9_QM,
531 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = GAUDI2_EVENT_TPC10_QM,
532 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = GAUDI2_EVENT_TPC10_QM,
533 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = GAUDI2_EVENT_TPC10_QM,
534 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = GAUDI2_EVENT_TPC10_QM,
535 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = GAUDI2_EVENT_TPC11_QM,
536 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = GAUDI2_EVENT_TPC11_QM,
537 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = GAUDI2_EVENT_TPC11_QM,
538 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = GAUDI2_EVENT_TPC11_QM,
539 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = GAUDI2_EVENT_HDMA4_QM,
540 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = GAUDI2_EVENT_HDMA4_QM,
541 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = GAUDI2_EVENT_HDMA4_QM,
542 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = GAUDI2_EVENT_HDMA4_QM,
543 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = GAUDI2_EVENT_HDMA5_QM,
544 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = GAUDI2_EVENT_HDMA5_QM,
545 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = GAUDI2_EVENT_HDMA5_QM,
546 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = GAUDI2_EVENT_HDMA5_QM,
547 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = GAUDI2_EVENT_MME2_QM,
548 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = GAUDI2_EVENT_MME2_QM,
549 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = GAUDI2_EVENT_MME2_QM,
550 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = GAUDI2_EVENT_MME2_QM,
551 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = GAUDI2_EVENT_TPC12_QM,
552 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = GAUDI2_EVENT_TPC12_QM,
553 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = GAUDI2_EVENT_TPC12_QM,
554 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = GAUDI2_EVENT_TPC12_QM,
555 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = GAUDI2_EVENT_TPC13_QM,
556 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = GAUDI2_EVENT_TPC13_QM,
557 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = GAUDI2_EVENT_TPC13_QM,
558 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = GAUDI2_EVENT_TPC13_QM,
559 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = GAUDI2_EVENT_TPC14_QM,
560 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = GAUDI2_EVENT_TPC14_QM,
561 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = GAUDI2_EVENT_TPC14_QM,
562 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = GAUDI2_EVENT_TPC14_QM,
563 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = GAUDI2_EVENT_TPC15_QM,
564 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = GAUDI2_EVENT_TPC15_QM,
565 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = GAUDI2_EVENT_TPC15_QM,
566 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = GAUDI2_EVENT_TPC15_QM,
567 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = GAUDI2_EVENT_TPC16_QM,
568 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = GAUDI2_EVENT_TPC16_QM,
569 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = GAUDI2_EVENT_TPC16_QM,
570 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = GAUDI2_EVENT_TPC16_QM,
571 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = GAUDI2_EVENT_TPC17_QM,
572 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = GAUDI2_EVENT_TPC17_QM,
573 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = GAUDI2_EVENT_TPC17_QM,
574 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = GAUDI2_EVENT_TPC17_QM,
575 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = GAUDI2_EVENT_HDMA6_QM,
576 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = GAUDI2_EVENT_HDMA6_QM,
577 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = GAUDI2_EVENT_HDMA6_QM,
578 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = GAUDI2_EVENT_HDMA6_QM,
579 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = GAUDI2_EVENT_HDMA7_QM,
580 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = GAUDI2_EVENT_HDMA7_QM,
581 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = GAUDI2_EVENT_HDMA7_QM,
582 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = GAUDI2_EVENT_HDMA7_QM,
583 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = GAUDI2_EVENT_MME3_QM,
584 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = GAUDI2_EVENT_MME3_QM,
585 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = GAUDI2_EVENT_MME3_QM,
586 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = GAUDI2_EVENT_MME3_QM,
587 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = GAUDI2_EVENT_TPC18_QM,
588 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = GAUDI2_EVENT_TPC18_QM,
589 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = GAUDI2_EVENT_TPC18_QM,
590 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = GAUDI2_EVENT_TPC18_QM,
591 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = GAUDI2_EVENT_TPC19_QM,
592 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = GAUDI2_EVENT_TPC19_QM,
593 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = GAUDI2_EVENT_TPC19_QM,
594 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = GAUDI2_EVENT_TPC19_QM,
595 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = GAUDI2_EVENT_TPC20_QM,
596 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = GAUDI2_EVENT_TPC20_QM,
597 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = GAUDI2_EVENT_TPC20_QM,
598 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = GAUDI2_EVENT_TPC20_QM,
599 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = GAUDI2_EVENT_TPC21_QM,
600 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = GAUDI2_EVENT_TPC21_QM,
601 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = GAUDI2_EVENT_TPC21_QM,
602 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = GAUDI2_EVENT_TPC21_QM,
603 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = GAUDI2_EVENT_TPC22_QM,
604 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = GAUDI2_EVENT_TPC22_QM,
605 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = GAUDI2_EVENT_TPC22_QM,
606 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = GAUDI2_EVENT_TPC22_QM,
607 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = GAUDI2_EVENT_TPC23_QM,
608 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = GAUDI2_EVENT_TPC23_QM,
609 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = GAUDI2_EVENT_TPC23_QM,
610 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = GAUDI2_EVENT_TPC23_QM,
611 [GAUDI2_QUEUE_ID_NIC_0_0] = GAUDI2_EVENT_NIC0_QM0,
612 [GAUDI2_QUEUE_ID_NIC_0_1] = GAUDI2_EVENT_NIC0_QM0,
613 [GAUDI2_QUEUE_ID_NIC_0_2] = GAUDI2_EVENT_NIC0_QM0,
614 [GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_EVENT_NIC0_QM0,
615 [GAUDI2_QUEUE_ID_NIC_1_0] = GAUDI2_EVENT_NIC0_QM1,
616 [GAUDI2_QUEUE_ID_NIC_1_1] = GAUDI2_EVENT_NIC0_QM1,
617 [GAUDI2_QUEUE_ID_NIC_1_2] = GAUDI2_EVENT_NIC0_QM1,
618 [GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_EVENT_NIC0_QM1,
619 [GAUDI2_QUEUE_ID_NIC_2_0] = GAUDI2_EVENT_NIC1_QM0,
620 [GAUDI2_QUEUE_ID_NIC_2_1] = GAUDI2_EVENT_NIC1_QM0,
621 [GAUDI2_QUEUE_ID_NIC_2_2] = GAUDI2_EVENT_NIC1_QM0,
622 [GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_EVENT_NIC1_QM0,
623 [GAUDI2_QUEUE_ID_NIC_3_0] = GAUDI2_EVENT_NIC1_QM1,
624 [GAUDI2_QUEUE_ID_NIC_3_1] = GAUDI2_EVENT_NIC1_QM1,
625 [GAUDI2_QUEUE_ID_NIC_3_2] = GAUDI2_EVENT_NIC1_QM1,
626 [GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_EVENT_NIC1_QM1,
627 [GAUDI2_QUEUE_ID_NIC_4_0] = GAUDI2_EVENT_NIC2_QM0,
628 [GAUDI2_QUEUE_ID_NIC_4_1] = GAUDI2_EVENT_NIC2_QM0,
629 [GAUDI2_QUEUE_ID_NIC_4_2] = GAUDI2_EVENT_NIC2_QM0,
630 [GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_EVENT_NIC2_QM0,
631 [GAUDI2_QUEUE_ID_NIC_5_0] = GAUDI2_EVENT_NIC2_QM1,
632 [GAUDI2_QUEUE_ID_NIC_5_1] = GAUDI2_EVENT_NIC2_QM1,
633 [GAUDI2_QUEUE_ID_NIC_5_2] = GAUDI2_EVENT_NIC2_QM1,
634 [GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_EVENT_NIC2_QM1,
635 [GAUDI2_QUEUE_ID_NIC_6_0] = GAUDI2_EVENT_NIC3_QM0,
636 [GAUDI2_QUEUE_ID_NIC_6_1] = GAUDI2_EVENT_NIC3_QM0,
637 [GAUDI2_QUEUE_ID_NIC_6_2] = GAUDI2_EVENT_NIC3_QM0,
638 [GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_EVENT_NIC3_QM0,
639 [GAUDI2_QUEUE_ID_NIC_7_0] = GAUDI2_EVENT_NIC3_QM1,
640 [GAUDI2_QUEUE_ID_NIC_7_1] = GAUDI2_EVENT_NIC3_QM1,
641 [GAUDI2_QUEUE_ID_NIC_7_2] = GAUDI2_EVENT_NIC3_QM1,
642 [GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_EVENT_NIC3_QM1,
643 [GAUDI2_QUEUE_ID_NIC_8_0] = GAUDI2_EVENT_NIC4_QM0,
644 [GAUDI2_QUEUE_ID_NIC_8_1] = GAUDI2_EVENT_NIC4_QM0,
645 [GAUDI2_QUEUE_ID_NIC_8_2] = GAUDI2_EVENT_NIC4_QM0,
646 [GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_EVENT_NIC4_QM0,
647 [GAUDI2_QUEUE_ID_NIC_9_0] = GAUDI2_EVENT_NIC4_QM1,
648 [GAUDI2_QUEUE_ID_NIC_9_1] = GAUDI2_EVENT_NIC4_QM1,
649 [GAUDI2_QUEUE_ID_NIC_9_2] = GAUDI2_EVENT_NIC4_QM1,
650 [GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_EVENT_NIC4_QM1,
651 [GAUDI2_QUEUE_ID_NIC_10_0] = GAUDI2_EVENT_NIC5_QM0,
652 [GAUDI2_QUEUE_ID_NIC_10_1] = GAUDI2_EVENT_NIC5_QM0,
653 [GAUDI2_QUEUE_ID_NIC_10_2] = GAUDI2_EVENT_NIC5_QM0,
654 [GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_EVENT_NIC5_QM0,
655 [GAUDI2_QUEUE_ID_NIC_11_0] = GAUDI2_EVENT_NIC5_QM1,
656 [GAUDI2_QUEUE_ID_NIC_11_1] = GAUDI2_EVENT_NIC5_QM1,
657 [GAUDI2_QUEUE_ID_NIC_11_2] = GAUDI2_EVENT_NIC5_QM1,
658 [GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_EVENT_NIC5_QM1,
659 [GAUDI2_QUEUE_ID_NIC_12_0] = GAUDI2_EVENT_NIC6_QM0,
660 [GAUDI2_QUEUE_ID_NIC_12_1] = GAUDI2_EVENT_NIC6_QM0,
661 [GAUDI2_QUEUE_ID_NIC_12_2] = GAUDI2_EVENT_NIC6_QM0,
662 [GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_EVENT_NIC6_QM0,
663 [GAUDI2_QUEUE_ID_NIC_13_0] = GAUDI2_EVENT_NIC6_QM1,
664 [GAUDI2_QUEUE_ID_NIC_13_1] = GAUDI2_EVENT_NIC6_QM1,
665 [GAUDI2_QUEUE_ID_NIC_13_2] = GAUDI2_EVENT_NIC6_QM1,
666 [GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_EVENT_NIC6_QM1,
667 [GAUDI2_QUEUE_ID_NIC_14_0] = GAUDI2_EVENT_NIC7_QM0,
668 [GAUDI2_QUEUE_ID_NIC_14_1] = GAUDI2_EVENT_NIC7_QM0,
669 [GAUDI2_QUEUE_ID_NIC_14_2] = GAUDI2_EVENT_NIC7_QM0,
670 [GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_EVENT_NIC7_QM0,
671 [GAUDI2_QUEUE_ID_NIC_15_0] = GAUDI2_EVENT_NIC7_QM1,
672 [GAUDI2_QUEUE_ID_NIC_15_1] = GAUDI2_EVENT_NIC7_QM1,
673 [GAUDI2_QUEUE_ID_NIC_15_2] = GAUDI2_EVENT_NIC7_QM1,
674 [GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_EVENT_NIC7_QM1,
675 [GAUDI2_QUEUE_ID_NIC_16_0] = GAUDI2_EVENT_NIC8_QM0,
676 [GAUDI2_QUEUE_ID_NIC_16_1] = GAUDI2_EVENT_NIC8_QM0,
677 [GAUDI2_QUEUE_ID_NIC_16_2] = GAUDI2_EVENT_NIC8_QM0,
678 [GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_EVENT_NIC8_QM0,
679 [GAUDI2_QUEUE_ID_NIC_17_0] = GAUDI2_EVENT_NIC8_QM1,
680 [GAUDI2_QUEUE_ID_NIC_17_1] = GAUDI2_EVENT_NIC8_QM1,
681 [GAUDI2_QUEUE_ID_NIC_17_2] = GAUDI2_EVENT_NIC8_QM1,
682 [GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_EVENT_NIC8_QM1,
683 [GAUDI2_QUEUE_ID_NIC_18_0] = GAUDI2_EVENT_NIC9_QM0,
684 [GAUDI2_QUEUE_ID_NIC_18_1] = GAUDI2_EVENT_NIC9_QM0,
685 [GAUDI2_QUEUE_ID_NIC_18_2] = GAUDI2_EVENT_NIC9_QM0,
686 [GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_EVENT_NIC9_QM0,
687 [GAUDI2_QUEUE_ID_NIC_19_0] = GAUDI2_EVENT_NIC9_QM1,
688 [GAUDI2_QUEUE_ID_NIC_19_1] = GAUDI2_EVENT_NIC9_QM1,
689 [GAUDI2_QUEUE_ID_NIC_19_2] = GAUDI2_EVENT_NIC9_QM1,
690 [GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_EVENT_NIC9_QM1,
691 [GAUDI2_QUEUE_ID_NIC_20_0] = GAUDI2_EVENT_NIC10_QM0,
692 [GAUDI2_QUEUE_ID_NIC_20_1] = GAUDI2_EVENT_NIC10_QM0,
693 [GAUDI2_QUEUE_ID_NIC_20_2] = GAUDI2_EVENT_NIC10_QM0,
694 [GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_EVENT_NIC10_QM0,
695 [GAUDI2_QUEUE_ID_NIC_21_0] = GAUDI2_EVENT_NIC10_QM1,
696 [GAUDI2_QUEUE_ID_NIC_21_1] = GAUDI2_EVENT_NIC10_QM1,
697 [GAUDI2_QUEUE_ID_NIC_21_2] = GAUDI2_EVENT_NIC10_QM1,
698 [GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_EVENT_NIC10_QM1,
699 [GAUDI2_QUEUE_ID_NIC_22_0] = GAUDI2_EVENT_NIC11_QM0,
700 [GAUDI2_QUEUE_ID_NIC_22_1] = GAUDI2_EVENT_NIC11_QM0,
701 [GAUDI2_QUEUE_ID_NIC_22_2] = GAUDI2_EVENT_NIC11_QM0,
702 [GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_EVENT_NIC11_QM0,
703 [GAUDI2_QUEUE_ID_NIC_23_0] = GAUDI2_EVENT_NIC11_QM1,
704 [GAUDI2_QUEUE_ID_NIC_23_1] = GAUDI2_EVENT_NIC11_QM1,
705 [GAUDI2_QUEUE_ID_NIC_23_2] = GAUDI2_EVENT_NIC11_QM1,
706 [GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_EVENT_NIC11_QM1,
707 [GAUDI2_QUEUE_ID_ROT_0_0] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
708 [GAUDI2_QUEUE_ID_ROT_0_1] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
709 [GAUDI2_QUEUE_ID_ROT_0_2] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
710 [GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_EVENT_ROTATOR0_ROT0_QM,
711 [GAUDI2_QUEUE_ID_ROT_1_0] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
712 [GAUDI2_QUEUE_ID_ROT_1_1] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
713 [GAUDI2_QUEUE_ID_ROT_1_2] = GAUDI2_EVENT_ROTATOR1_ROT1_QM,
714 [GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_EVENT_ROTATOR1_ROT1_QM
715 };
716
717 static const int gaudi2_dma_core_async_event_id[] = {
718 [DMA_CORE_ID_EDMA0] = GAUDI2_EVENT_HDMA0_CORE,
719 [DMA_CORE_ID_EDMA1] = GAUDI2_EVENT_HDMA1_CORE,
720 [DMA_CORE_ID_EDMA2] = GAUDI2_EVENT_HDMA2_CORE,
721 [DMA_CORE_ID_EDMA3] = GAUDI2_EVENT_HDMA3_CORE,
722 [DMA_CORE_ID_EDMA4] = GAUDI2_EVENT_HDMA4_CORE,
723 [DMA_CORE_ID_EDMA5] = GAUDI2_EVENT_HDMA5_CORE,
724 [DMA_CORE_ID_EDMA6] = GAUDI2_EVENT_HDMA6_CORE,
725 [DMA_CORE_ID_EDMA7] = GAUDI2_EVENT_HDMA7_CORE,
726 [DMA_CORE_ID_PDMA0] = GAUDI2_EVENT_PDMA0_CORE,
727 [DMA_CORE_ID_PDMA1] = GAUDI2_EVENT_PDMA1_CORE,
728 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE,
729 };
730
731 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = {
732 "qman sei intr",
733 "arc sei intr"
734 };
735
736 static const char * const gaudi2_cpu_sei_error_cause[GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE] = {
737 "AXI_TERMINATOR WR",
738 "AXI_TERMINATOR RD",
739 "AXI SPLIT SEI Status"
740 };
741
742 static const char * const gaudi2_arc_sei_error_cause[GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE] = {
743 "cbu_bresp_sei_intr_cause",
744 "cbu_rresp_sei_intr_cause",
745 "lbu_bresp_sei_intr_cause",
746 "lbu_rresp_sei_intr_cause",
747 "cbu_axi_split_intr_cause",
748 "lbu_axi_split_intr_cause",
749 "arc_ip_excptn_sei_intr_cause",
750 "dmi_bresp_sei_intr_cause",
751 "aux2apb_err_sei_intr_cause",
752 "cfg_lbw_wr_terminated_intr_cause",
753 "cfg_lbw_rd_terminated_intr_cause",
754 "cfg_dccm_wr_terminated_intr_cause",
755 "cfg_dccm_rd_terminated_intr_cause",
756 "cfg_hbw_rd_terminated_intr_cause"
757 };
758
759 static const char * const gaudi2_dec_error_cause[GAUDI2_NUM_OF_DEC_ERR_CAUSE] = {
760 "msix_vcd_hbw_sei",
761 "msix_l2c_hbw_sei",
762 "msix_nrm_hbw_sei",
763 "msix_abnrm_hbw_sei",
764 "msix_vcd_lbw_sei",
765 "msix_l2c_lbw_sei",
766 "msix_nrm_lbw_sei",
767 "msix_abnrm_lbw_sei",
768 "apb_vcd_lbw_sei",
769 "apb_l2c_lbw_sei",
770 "apb_nrm_lbw_sei",
771 "apb_abnrm_lbw_sei",
772 "dec_sei",
773 "dec_apb_sei",
774 "trc_apb_sei",
775 "lbw_mstr_if_sei",
776 "axi_split_bresp_err_sei",
777 "hbw_axi_wr_viol_sei",
778 "hbw_axi_rd_viol_sei",
779 "lbw_axi_wr_viol_sei",
780 "lbw_axi_rd_viol_sei",
781 "vcd_spi",
782 "l2c_spi",
783 "nrm_spi",
784 "abnrm_spi",
785 };
786
787 static const char * const gaudi2_qman_error_cause[GAUDI2_NUM_OF_QM_ERR_CAUSE] = {
788 "PQ AXI HBW error",
789 "CQ AXI HBW error",
790 "CP AXI HBW error",
791 "CP error due to undefined OPCODE",
792 "CP encountered STOP OPCODE",
793 "CP AXI LBW error",
794 "CP WRREG32 or WRBULK returned error",
795 "N/A",
796 "FENCE 0 inc over max value and clipped",
797 "FENCE 1 inc over max value and clipped",
798 "FENCE 2 inc over max value and clipped",
799 "FENCE 3 inc over max value and clipped",
800 "FENCE 0 dec under min value and clipped",
801 "FENCE 1 dec under min value and clipped",
802 "FENCE 2 dec under min value and clipped",
803 "FENCE 3 dec under min value and clipped",
804 "CPDMA Up overflow",
805 "PQC L2H error"
806 };
807
808 static const char * const gaudi2_lower_qman_error_cause[GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE] = {
809 "RSVD0",
810 "CQ AXI HBW error",
811 "CP AXI HBW error",
812 "CP error due to undefined OPCODE",
813 "CP encountered STOP OPCODE",
814 "CP AXI LBW error",
815 "CP WRREG32 or WRBULK returned error",
816 "N/A",
817 "FENCE 0 inc over max value and clipped",
818 "FENCE 1 inc over max value and clipped",
819 "FENCE 2 inc over max value and clipped",
820 "FENCE 3 inc over max value and clipped",
821 "FENCE 0 dec under min value and clipped",
822 "FENCE 1 dec under min value and clipped",
823 "FENCE 2 dec under min value and clipped",
824 "FENCE 3 dec under min value and clipped",
825 "CPDMA Up overflow",
826 "RSVD17",
827 "CQ_WR_IFIFO_CI_ERR",
828 "CQ_WR_CTL_CI_ERR",
829 "ARC_CQF_RD_ERR",
830 "ARC_CQ_WR_IFIFO_CI_ERR",
831 "ARC_CQ_WR_CTL_CI_ERR",
832 "ARC_AXI_ERR",
833 "CP_SWITCH_WDT_ERR"
834 };
835
836 static const char * const gaudi2_qman_arb_error_cause[GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE] = {
837 "Choice push while full error",
838 "Choice Q watchdog error",
839 "MSG AXI LBW returned with error"
840 };
841
842 static const char * const guadi2_rot_error_cause[GAUDI2_NUM_OF_ROT_ERR_CAUSE] = {
843 "qm_axi_err",
844 "qm_trace_fence_events",
845 "qm_sw_err",
846 "qm_cp_sw_stop",
847 "lbw_mstr_rresp_err",
848 "lbw_mstr_bresp_err",
849 "lbw_msg_slverr",
850 "hbw_msg_slverr",
851 "wbc_slverr",
852 "hbw_mstr_rresp_err",
853 "hbw_mstr_bresp_err",
854 "sb_resp_intr",
855 "mrsb_resp_intr",
856 "core_dw_status_0",
857 "core_dw_status_1",
858 "core_dw_status_2",
859 "core_dw_status_3",
860 "core_dw_status_4",
861 "core_dw_status_5",
862 "core_dw_status_6",
863 "core_dw_status_7",
864 "async_arc2cpu_sei_intr",
865 };
866
867 static const char * const gaudi2_tpc_interrupts_cause[GAUDI2_NUM_OF_TPC_INTR_CAUSE] = {
868 "tpc_address_exceed_slm",
869 "tpc_div_by_0",
870 "tpc_spu_mac_overflow",
871 "tpc_spu_addsub_overflow",
872 "tpc_spu_abs_overflow",
873 "tpc_spu_fma_fp_dst_nan",
874 "tpc_spu_fma_fp_dst_inf",
875 "tpc_spu_convert_fp_dst_nan",
876 "tpc_spu_convert_fp_dst_inf",
877 "tpc_spu_fp_dst_denorm",
878 "tpc_vpu_mac_overflow",
879 "tpc_vpu_addsub_overflow",
880 "tpc_vpu_abs_overflow",
881 "tpc_vpu_convert_fp_dst_nan",
882 "tpc_vpu_convert_fp_dst_inf",
883 "tpc_vpu_fma_fp_dst_nan",
884 "tpc_vpu_fma_fp_dst_inf",
885 "tpc_vpu_fp_dst_denorm",
886 "tpc_assertions",
887 "tpc_illegal_instruction",
888 "tpc_pc_wrap_around",
889 "tpc_qm_sw_err",
890 "tpc_hbw_rresp_err",
891 "tpc_hbw_bresp_err",
892 "tpc_lbw_rresp_err",
893 "tpc_lbw_bresp_err",
894 "st_unlock_already_locked",
895 "invalid_lock_access",
896 "LD_L protection violation",
897 "ST_L protection violation",
898 "D$ L0CS mismatch",
899 };
900
901 static const char * const guadi2_mme_error_cause[GAUDI2_NUM_OF_MME_ERR_CAUSE] = {
902 "agu_resp_intr",
903 "qman_axi_err",
904 "wap sei (wbc axi err)",
905 "arc sei",
906 "cfg access error",
907 "qm_sw_err",
908 "sbte_dbg_intr_0",
909 "sbte_dbg_intr_1",
910 "sbte_dbg_intr_2",
911 "sbte_dbg_intr_3",
912 "sbte_dbg_intr_4",
913 "sbte_prtn_intr_0",
914 "sbte_prtn_intr_1",
915 "sbte_prtn_intr_2",
916 "sbte_prtn_intr_3",
917 "sbte_prtn_intr_4",
918 };
919
920 static const char * const guadi2_mme_wap_error_cause[GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE] = {
921 "WBC ERR RESP_0",
922 "WBC ERR RESP_1",
923 "AP SOURCE POS INF",
924 "AP SOURCE NEG INF",
925 "AP SOURCE NAN",
926 "AP RESULT POS INF",
927 "AP RESULT NEG INF",
928 };
929
930 static const char * const gaudi2_dma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
931 "HBW Read returned with error RRESP",
932 "HBW write returned with error BRESP",
933 "LBW write returned with error BRESP",
934 "descriptor_fifo_overflow",
935 "KDMA SB LBW Read returned with error",
936 "KDMA WBC LBW Write returned with error",
937 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
938 "WRONG CFG FOR COMMIT IN LIN DMA"
939 };
940
941 static const char * const gaudi2_kdma_core_interrupts_cause[GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE] = {
942 "HBW/LBW Read returned with error RRESP",
943 "HBW/LBW write returned with error BRESP",
944 "LBW write returned with error BRESP",
945 "descriptor_fifo_overflow",
946 "KDMA SB LBW Read returned with error",
947 "KDMA WBC LBW Write returned with error",
948 "TRANSPOSE ENGINE DESC FIFO OVERFLOW",
949 "WRONG CFG FOR COMMIT IN LIN DMA"
950 };
951
952 struct gaudi2_sm_sei_cause_data {
953 const char *cause_name;
954 const char *log_name;
955 };
956
957 static const struct gaudi2_sm_sei_cause_data
958 gaudi2_sm_sei_cause[GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE] = {
959 {"calculated SO value overflow/underflow", "SOB ID"},
960 {"payload address of monitor is not aligned to 4B", "monitor addr"},
961 {"armed monitor write got BRESP (SLVERR or DECERR)", "AXI id"},
962 };
963
964 static const char * const
965 gaudi2_pmmu_fatal_interrupts_cause[GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE] = {
966 "LATENCY_RD_OUT_FIFO_OVERRUN",
967 "LATENCY_WR_OUT_FIFO_OVERRUN",
968 };
969
970 static const char * const
971 gaudi2_hif_fatal_interrupts_cause[GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE] = {
972 "LATENCY_RD_OUT_FIFO_OVERRUN",
973 "LATENCY_WR_OUT_FIFO_OVERRUN",
974 };
975
976 static const char * const
977 gaudi2_psoc_axi_drain_interrupts_cause[GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE] = {
978 "AXI drain HBW",
979 "AXI drain LBW",
980 };
981
982 static const char * const
983 gaudi2_pcie_addr_dec_error_cause[GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE] = {
984 "HBW error response",
985 "LBW error response",
986 "TLP is blocked by RR"
987 };
988
989 static const int gaudi2_queue_id_to_engine_id[] = {
990 [GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_0_3] = GAUDI2_ENGINE_ID_PDMA_0,
991 [GAUDI2_QUEUE_ID_PDMA_1_0...GAUDI2_QUEUE_ID_PDMA_1_3] = GAUDI2_ENGINE_ID_PDMA_1,
992 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] =
993 GAUDI2_DCORE0_ENGINE_ID_EDMA_0,
994 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] =
995 GAUDI2_DCORE0_ENGINE_ID_EDMA_1,
996 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] =
997 GAUDI2_DCORE1_ENGINE_ID_EDMA_0,
998 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] =
999 GAUDI2_DCORE1_ENGINE_ID_EDMA_1,
1000 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] =
1001 GAUDI2_DCORE2_ENGINE_ID_EDMA_0,
1002 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] =
1003 GAUDI2_DCORE2_ENGINE_ID_EDMA_1,
1004 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] =
1005 GAUDI2_DCORE3_ENGINE_ID_EDMA_0,
1006 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] =
1007 GAUDI2_DCORE3_ENGINE_ID_EDMA_1,
1008 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3] =
1009 GAUDI2_DCORE0_ENGINE_ID_MME,
1010 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3] =
1011 GAUDI2_DCORE1_ENGINE_ID_MME,
1012 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3] =
1013 GAUDI2_DCORE2_ENGINE_ID_MME,
1014 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3] =
1015 GAUDI2_DCORE3_ENGINE_ID_MME,
1016 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0...GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] =
1017 GAUDI2_DCORE0_ENGINE_ID_TPC_0,
1018 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0...GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] =
1019 GAUDI2_DCORE0_ENGINE_ID_TPC_1,
1020 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0...GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] =
1021 GAUDI2_DCORE0_ENGINE_ID_TPC_2,
1022 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0...GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] =
1023 GAUDI2_DCORE0_ENGINE_ID_TPC_3,
1024 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0...GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] =
1025 GAUDI2_DCORE0_ENGINE_ID_TPC_4,
1026 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0...GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] =
1027 GAUDI2_DCORE0_ENGINE_ID_TPC_5,
1028 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0...GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] =
1029 GAUDI2_DCORE0_ENGINE_ID_TPC_6,
1030 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0...GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] =
1031 GAUDI2_DCORE1_ENGINE_ID_TPC_0,
1032 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0...GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] =
1033 GAUDI2_DCORE1_ENGINE_ID_TPC_1,
1034 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0...GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] =
1035 GAUDI2_DCORE1_ENGINE_ID_TPC_2,
1036 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0...GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] =
1037 GAUDI2_DCORE1_ENGINE_ID_TPC_3,
1038 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0...GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] =
1039 GAUDI2_DCORE1_ENGINE_ID_TPC_4,
1040 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0...GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] =
1041 GAUDI2_DCORE1_ENGINE_ID_TPC_5,
1042 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0...GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] =
1043 GAUDI2_DCORE2_ENGINE_ID_TPC_0,
1044 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0...GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] =
1045 GAUDI2_DCORE2_ENGINE_ID_TPC_1,
1046 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0...GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] =
1047 GAUDI2_DCORE2_ENGINE_ID_TPC_2,
1048 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0...GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] =
1049 GAUDI2_DCORE2_ENGINE_ID_TPC_3,
1050 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0...GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] =
1051 GAUDI2_DCORE2_ENGINE_ID_TPC_4,
1052 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0...GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] =
1053 GAUDI2_DCORE2_ENGINE_ID_TPC_5,
1054 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0...GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] =
1055 GAUDI2_DCORE3_ENGINE_ID_TPC_0,
1056 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0...GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] =
1057 GAUDI2_DCORE3_ENGINE_ID_TPC_1,
1058 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0...GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] =
1059 GAUDI2_DCORE3_ENGINE_ID_TPC_2,
1060 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0...GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] =
1061 GAUDI2_DCORE3_ENGINE_ID_TPC_3,
1062 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0...GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] =
1063 GAUDI2_DCORE3_ENGINE_ID_TPC_4,
1064 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0...GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] =
1065 GAUDI2_DCORE3_ENGINE_ID_TPC_5,
1066 [GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_0_3] = GAUDI2_ENGINE_ID_NIC0_0,
1067 [GAUDI2_QUEUE_ID_NIC_1_0...GAUDI2_QUEUE_ID_NIC_1_3] = GAUDI2_ENGINE_ID_NIC0_1,
1068 [GAUDI2_QUEUE_ID_NIC_2_0...GAUDI2_QUEUE_ID_NIC_2_3] = GAUDI2_ENGINE_ID_NIC1_0,
1069 [GAUDI2_QUEUE_ID_NIC_3_0...GAUDI2_QUEUE_ID_NIC_3_3] = GAUDI2_ENGINE_ID_NIC1_1,
1070 [GAUDI2_QUEUE_ID_NIC_4_0...GAUDI2_QUEUE_ID_NIC_4_3] = GAUDI2_ENGINE_ID_NIC2_0,
1071 [GAUDI2_QUEUE_ID_NIC_5_0...GAUDI2_QUEUE_ID_NIC_5_3] = GAUDI2_ENGINE_ID_NIC2_1,
1072 [GAUDI2_QUEUE_ID_NIC_6_0...GAUDI2_QUEUE_ID_NIC_6_3] = GAUDI2_ENGINE_ID_NIC3_0,
1073 [GAUDI2_QUEUE_ID_NIC_7_0...GAUDI2_QUEUE_ID_NIC_7_3] = GAUDI2_ENGINE_ID_NIC3_1,
1074 [GAUDI2_QUEUE_ID_NIC_8_0...GAUDI2_QUEUE_ID_NIC_8_3] = GAUDI2_ENGINE_ID_NIC4_0,
1075 [GAUDI2_QUEUE_ID_NIC_9_0...GAUDI2_QUEUE_ID_NIC_9_3] = GAUDI2_ENGINE_ID_NIC4_1,
1076 [GAUDI2_QUEUE_ID_NIC_10_0...GAUDI2_QUEUE_ID_NIC_10_3] = GAUDI2_ENGINE_ID_NIC5_0,
1077 [GAUDI2_QUEUE_ID_NIC_11_0...GAUDI2_QUEUE_ID_NIC_11_3] = GAUDI2_ENGINE_ID_NIC5_1,
1078 [GAUDI2_QUEUE_ID_NIC_12_0...GAUDI2_QUEUE_ID_NIC_12_3] = GAUDI2_ENGINE_ID_NIC6_0,
1079 [GAUDI2_QUEUE_ID_NIC_13_0...GAUDI2_QUEUE_ID_NIC_13_3] = GAUDI2_ENGINE_ID_NIC6_1,
1080 [GAUDI2_QUEUE_ID_NIC_14_0...GAUDI2_QUEUE_ID_NIC_14_3] = GAUDI2_ENGINE_ID_NIC7_0,
1081 [GAUDI2_QUEUE_ID_NIC_15_0...GAUDI2_QUEUE_ID_NIC_15_3] = GAUDI2_ENGINE_ID_NIC7_1,
1082 [GAUDI2_QUEUE_ID_NIC_16_0...GAUDI2_QUEUE_ID_NIC_16_3] = GAUDI2_ENGINE_ID_NIC8_0,
1083 [GAUDI2_QUEUE_ID_NIC_17_0...GAUDI2_QUEUE_ID_NIC_17_3] = GAUDI2_ENGINE_ID_NIC8_1,
1084 [GAUDI2_QUEUE_ID_NIC_18_0...GAUDI2_QUEUE_ID_NIC_18_3] = GAUDI2_ENGINE_ID_NIC9_0,
1085 [GAUDI2_QUEUE_ID_NIC_19_0...GAUDI2_QUEUE_ID_NIC_19_3] = GAUDI2_ENGINE_ID_NIC9_1,
1086 [GAUDI2_QUEUE_ID_NIC_20_0...GAUDI2_QUEUE_ID_NIC_20_3] = GAUDI2_ENGINE_ID_NIC10_0,
1087 [GAUDI2_QUEUE_ID_NIC_21_0...GAUDI2_QUEUE_ID_NIC_21_3] = GAUDI2_ENGINE_ID_NIC10_1,
1088 [GAUDI2_QUEUE_ID_NIC_22_0...GAUDI2_QUEUE_ID_NIC_22_3] = GAUDI2_ENGINE_ID_NIC11_0,
1089 [GAUDI2_QUEUE_ID_NIC_23_0...GAUDI2_QUEUE_ID_NIC_23_3] = GAUDI2_ENGINE_ID_NIC11_1,
1090 [GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_0_3] = GAUDI2_ENGINE_ID_ROT_0,
1091 [GAUDI2_QUEUE_ID_ROT_1_0...GAUDI2_QUEUE_ID_ROT_1_3] = GAUDI2_ENGINE_ID_ROT_1,
1092 };
1093
1094 const u32 gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_SIZE] = {
1095 [GAUDI2_QUEUE_ID_PDMA_0_0] = mmPDMA0_QM_BASE,
1096 [GAUDI2_QUEUE_ID_PDMA_0_1] = mmPDMA0_QM_BASE,
1097 [GAUDI2_QUEUE_ID_PDMA_0_2] = mmPDMA0_QM_BASE,
1098 [GAUDI2_QUEUE_ID_PDMA_0_3] = mmPDMA0_QM_BASE,
1099 [GAUDI2_QUEUE_ID_PDMA_1_0] = mmPDMA1_QM_BASE,
1100 [GAUDI2_QUEUE_ID_PDMA_1_1] = mmPDMA1_QM_BASE,
1101 [GAUDI2_QUEUE_ID_PDMA_1_2] = mmPDMA1_QM_BASE,
1102 [GAUDI2_QUEUE_ID_PDMA_1_3] = mmPDMA1_QM_BASE,
1103 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = mmDCORE0_EDMA0_QM_BASE,
1104 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = mmDCORE0_EDMA0_QM_BASE,
1105 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = mmDCORE0_EDMA0_QM_BASE,
1106 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = mmDCORE0_EDMA0_QM_BASE,
1107 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = mmDCORE0_EDMA1_QM_BASE,
1108 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = mmDCORE0_EDMA1_QM_BASE,
1109 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = mmDCORE0_EDMA1_QM_BASE,
1110 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = mmDCORE0_EDMA1_QM_BASE,
1111 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = mmDCORE0_MME_QM_BASE,
1112 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = mmDCORE0_MME_QM_BASE,
1113 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = mmDCORE0_MME_QM_BASE,
1114 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = mmDCORE0_MME_QM_BASE,
1115 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = mmDCORE0_TPC0_QM_BASE,
1116 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = mmDCORE0_TPC0_QM_BASE,
1117 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = mmDCORE0_TPC0_QM_BASE,
1118 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = mmDCORE0_TPC0_QM_BASE,
1119 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = mmDCORE0_TPC1_QM_BASE,
1120 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = mmDCORE0_TPC1_QM_BASE,
1121 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = mmDCORE0_TPC1_QM_BASE,
1122 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = mmDCORE0_TPC1_QM_BASE,
1123 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = mmDCORE0_TPC2_QM_BASE,
1124 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = mmDCORE0_TPC2_QM_BASE,
1125 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = mmDCORE0_TPC2_QM_BASE,
1126 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = mmDCORE0_TPC2_QM_BASE,
1127 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = mmDCORE0_TPC3_QM_BASE,
1128 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = mmDCORE0_TPC3_QM_BASE,
1129 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = mmDCORE0_TPC3_QM_BASE,
1130 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = mmDCORE0_TPC3_QM_BASE,
1131 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = mmDCORE0_TPC4_QM_BASE,
1132 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = mmDCORE0_TPC4_QM_BASE,
1133 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = mmDCORE0_TPC4_QM_BASE,
1134 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = mmDCORE0_TPC4_QM_BASE,
1135 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = mmDCORE0_TPC5_QM_BASE,
1136 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = mmDCORE0_TPC5_QM_BASE,
1137 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = mmDCORE0_TPC5_QM_BASE,
1138 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = mmDCORE0_TPC5_QM_BASE,
1139 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = mmDCORE0_TPC6_QM_BASE,
1140 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = mmDCORE0_TPC6_QM_BASE,
1141 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = mmDCORE0_TPC6_QM_BASE,
1142 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = mmDCORE0_TPC6_QM_BASE,
1143 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = mmDCORE1_EDMA0_QM_BASE,
1144 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = mmDCORE1_EDMA0_QM_BASE,
1145 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = mmDCORE1_EDMA0_QM_BASE,
1146 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = mmDCORE1_EDMA0_QM_BASE,
1147 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = mmDCORE1_EDMA1_QM_BASE,
1148 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = mmDCORE1_EDMA1_QM_BASE,
1149 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = mmDCORE1_EDMA1_QM_BASE,
1150 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = mmDCORE1_EDMA1_QM_BASE,
1151 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = mmDCORE1_MME_QM_BASE,
1152 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = mmDCORE1_MME_QM_BASE,
1153 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = mmDCORE1_MME_QM_BASE,
1154 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = mmDCORE1_MME_QM_BASE,
1155 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = mmDCORE1_TPC0_QM_BASE,
1156 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = mmDCORE1_TPC0_QM_BASE,
1157 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = mmDCORE1_TPC0_QM_BASE,
1158 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = mmDCORE1_TPC0_QM_BASE,
1159 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = mmDCORE1_TPC1_QM_BASE,
1160 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = mmDCORE1_TPC1_QM_BASE,
1161 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = mmDCORE1_TPC1_QM_BASE,
1162 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = mmDCORE1_TPC1_QM_BASE,
1163 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = mmDCORE1_TPC2_QM_BASE,
1164 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = mmDCORE1_TPC2_QM_BASE,
1165 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = mmDCORE1_TPC2_QM_BASE,
1166 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = mmDCORE1_TPC2_QM_BASE,
1167 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = mmDCORE1_TPC3_QM_BASE,
1168 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = mmDCORE1_TPC3_QM_BASE,
1169 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = mmDCORE1_TPC3_QM_BASE,
1170 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = mmDCORE1_TPC3_QM_BASE,
1171 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = mmDCORE1_TPC4_QM_BASE,
1172 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = mmDCORE1_TPC4_QM_BASE,
1173 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = mmDCORE1_TPC4_QM_BASE,
1174 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = mmDCORE1_TPC4_QM_BASE,
1175 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = mmDCORE1_TPC5_QM_BASE,
1176 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = mmDCORE1_TPC5_QM_BASE,
1177 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = mmDCORE1_TPC5_QM_BASE,
1178 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = mmDCORE1_TPC5_QM_BASE,
1179 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = mmDCORE2_EDMA0_QM_BASE,
1180 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = mmDCORE2_EDMA0_QM_BASE,
1181 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = mmDCORE2_EDMA0_QM_BASE,
1182 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = mmDCORE2_EDMA0_QM_BASE,
1183 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = mmDCORE2_EDMA1_QM_BASE,
1184 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = mmDCORE2_EDMA1_QM_BASE,
1185 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = mmDCORE2_EDMA1_QM_BASE,
1186 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = mmDCORE2_EDMA1_QM_BASE,
1187 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = mmDCORE2_MME_QM_BASE,
1188 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = mmDCORE2_MME_QM_BASE,
1189 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = mmDCORE2_MME_QM_BASE,
1190 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = mmDCORE2_MME_QM_BASE,
1191 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = mmDCORE2_TPC0_QM_BASE,
1192 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = mmDCORE2_TPC0_QM_BASE,
1193 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = mmDCORE2_TPC0_QM_BASE,
1194 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = mmDCORE2_TPC0_QM_BASE,
1195 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = mmDCORE2_TPC1_QM_BASE,
1196 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = mmDCORE2_TPC1_QM_BASE,
1197 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = mmDCORE2_TPC1_QM_BASE,
1198 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = mmDCORE2_TPC1_QM_BASE,
1199 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = mmDCORE2_TPC2_QM_BASE,
1200 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = mmDCORE2_TPC2_QM_BASE,
1201 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = mmDCORE2_TPC2_QM_BASE,
1202 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = mmDCORE2_TPC2_QM_BASE,
1203 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = mmDCORE2_TPC3_QM_BASE,
1204 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = mmDCORE2_TPC3_QM_BASE,
1205 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = mmDCORE2_TPC3_QM_BASE,
1206 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = mmDCORE2_TPC3_QM_BASE,
1207 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = mmDCORE2_TPC4_QM_BASE,
1208 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = mmDCORE2_TPC4_QM_BASE,
1209 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = mmDCORE2_TPC4_QM_BASE,
1210 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = mmDCORE2_TPC4_QM_BASE,
1211 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = mmDCORE2_TPC5_QM_BASE,
1212 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = mmDCORE2_TPC5_QM_BASE,
1213 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = mmDCORE2_TPC5_QM_BASE,
1214 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = mmDCORE2_TPC5_QM_BASE,
1215 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = mmDCORE3_EDMA0_QM_BASE,
1216 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = mmDCORE3_EDMA0_QM_BASE,
1217 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = mmDCORE3_EDMA0_QM_BASE,
1218 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = mmDCORE3_EDMA0_QM_BASE,
1219 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = mmDCORE3_EDMA1_QM_BASE,
1220 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = mmDCORE3_EDMA1_QM_BASE,
1221 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = mmDCORE3_EDMA1_QM_BASE,
1222 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = mmDCORE3_EDMA1_QM_BASE,
1223 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = mmDCORE3_MME_QM_BASE,
1224 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = mmDCORE3_MME_QM_BASE,
1225 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = mmDCORE3_MME_QM_BASE,
1226 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = mmDCORE3_MME_QM_BASE,
1227 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = mmDCORE3_TPC0_QM_BASE,
1228 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = mmDCORE3_TPC0_QM_BASE,
1229 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = mmDCORE3_TPC0_QM_BASE,
1230 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = mmDCORE3_TPC0_QM_BASE,
1231 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = mmDCORE3_TPC1_QM_BASE,
1232 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = mmDCORE3_TPC1_QM_BASE,
1233 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = mmDCORE3_TPC1_QM_BASE,
1234 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = mmDCORE3_TPC1_QM_BASE,
1235 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = mmDCORE3_TPC2_QM_BASE,
1236 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = mmDCORE3_TPC2_QM_BASE,
1237 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = mmDCORE3_TPC2_QM_BASE,
1238 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = mmDCORE3_TPC2_QM_BASE,
1239 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = mmDCORE3_TPC3_QM_BASE,
1240 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = mmDCORE3_TPC3_QM_BASE,
1241 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = mmDCORE3_TPC3_QM_BASE,
1242 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = mmDCORE3_TPC3_QM_BASE,
1243 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = mmDCORE3_TPC4_QM_BASE,
1244 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = mmDCORE3_TPC4_QM_BASE,
1245 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = mmDCORE3_TPC4_QM_BASE,
1246 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = mmDCORE3_TPC4_QM_BASE,
1247 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = mmDCORE3_TPC5_QM_BASE,
1248 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = mmDCORE3_TPC5_QM_BASE,
1249 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = mmDCORE3_TPC5_QM_BASE,
1250 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = mmDCORE3_TPC5_QM_BASE,
1251 [GAUDI2_QUEUE_ID_NIC_0_0] = mmNIC0_QM0_BASE,
1252 [GAUDI2_QUEUE_ID_NIC_0_1] = mmNIC0_QM0_BASE,
1253 [GAUDI2_QUEUE_ID_NIC_0_2] = mmNIC0_QM0_BASE,
1254 [GAUDI2_QUEUE_ID_NIC_0_3] = mmNIC0_QM0_BASE,
1255 [GAUDI2_QUEUE_ID_NIC_1_0] = mmNIC0_QM1_BASE,
1256 [GAUDI2_QUEUE_ID_NIC_1_1] = mmNIC0_QM1_BASE,
1257 [GAUDI2_QUEUE_ID_NIC_1_2] = mmNIC0_QM1_BASE,
1258 [GAUDI2_QUEUE_ID_NIC_1_3] = mmNIC0_QM1_BASE,
1259 [GAUDI2_QUEUE_ID_NIC_2_0] = mmNIC1_QM0_BASE,
1260 [GAUDI2_QUEUE_ID_NIC_2_1] = mmNIC1_QM0_BASE,
1261 [GAUDI2_QUEUE_ID_NIC_2_2] = mmNIC1_QM0_BASE,
1262 [GAUDI2_QUEUE_ID_NIC_2_3] = mmNIC1_QM0_BASE,
1263 [GAUDI2_QUEUE_ID_NIC_3_0] = mmNIC1_QM1_BASE,
1264 [GAUDI2_QUEUE_ID_NIC_3_1] = mmNIC1_QM1_BASE,
1265 [GAUDI2_QUEUE_ID_NIC_3_2] = mmNIC1_QM1_BASE,
1266 [GAUDI2_QUEUE_ID_NIC_3_3] = mmNIC1_QM1_BASE,
1267 [GAUDI2_QUEUE_ID_NIC_4_0] = mmNIC2_QM0_BASE,
1268 [GAUDI2_QUEUE_ID_NIC_4_1] = mmNIC2_QM0_BASE,
1269 [GAUDI2_QUEUE_ID_NIC_4_2] = mmNIC2_QM0_BASE,
1270 [GAUDI2_QUEUE_ID_NIC_4_3] = mmNIC2_QM0_BASE,
1271 [GAUDI2_QUEUE_ID_NIC_5_0] = mmNIC2_QM1_BASE,
1272 [GAUDI2_QUEUE_ID_NIC_5_1] = mmNIC2_QM1_BASE,
1273 [GAUDI2_QUEUE_ID_NIC_5_2] = mmNIC2_QM1_BASE,
1274 [GAUDI2_QUEUE_ID_NIC_5_3] = mmNIC2_QM1_BASE,
1275 [GAUDI2_QUEUE_ID_NIC_6_0] = mmNIC3_QM0_BASE,
1276 [GAUDI2_QUEUE_ID_NIC_6_1] = mmNIC3_QM0_BASE,
1277 [GAUDI2_QUEUE_ID_NIC_6_2] = mmNIC3_QM0_BASE,
1278 [GAUDI2_QUEUE_ID_NIC_6_3] = mmNIC3_QM0_BASE,
1279 [GAUDI2_QUEUE_ID_NIC_7_0] = mmNIC3_QM1_BASE,
1280 [GAUDI2_QUEUE_ID_NIC_7_1] = mmNIC3_QM1_BASE,
1281 [GAUDI2_QUEUE_ID_NIC_7_2] = mmNIC3_QM1_BASE,
1282 [GAUDI2_QUEUE_ID_NIC_7_3] = mmNIC3_QM1_BASE,
1283 [GAUDI2_QUEUE_ID_NIC_8_0] = mmNIC4_QM0_BASE,
1284 [GAUDI2_QUEUE_ID_NIC_8_1] = mmNIC4_QM0_BASE,
1285 [GAUDI2_QUEUE_ID_NIC_8_2] = mmNIC4_QM0_BASE,
1286 [GAUDI2_QUEUE_ID_NIC_8_3] = mmNIC4_QM0_BASE,
1287 [GAUDI2_QUEUE_ID_NIC_9_0] = mmNIC4_QM1_BASE,
1288 [GAUDI2_QUEUE_ID_NIC_9_1] = mmNIC4_QM1_BASE,
1289 [GAUDI2_QUEUE_ID_NIC_9_2] = mmNIC4_QM1_BASE,
1290 [GAUDI2_QUEUE_ID_NIC_9_3] = mmNIC4_QM1_BASE,
1291 [GAUDI2_QUEUE_ID_NIC_10_0] = mmNIC5_QM0_BASE,
1292 [GAUDI2_QUEUE_ID_NIC_10_1] = mmNIC5_QM0_BASE,
1293 [GAUDI2_QUEUE_ID_NIC_10_2] = mmNIC5_QM0_BASE,
1294 [GAUDI2_QUEUE_ID_NIC_10_3] = mmNIC5_QM0_BASE,
1295 [GAUDI2_QUEUE_ID_NIC_11_0] = mmNIC5_QM1_BASE,
1296 [GAUDI2_QUEUE_ID_NIC_11_1] = mmNIC5_QM1_BASE,
1297 [GAUDI2_QUEUE_ID_NIC_11_2] = mmNIC5_QM1_BASE,
1298 [GAUDI2_QUEUE_ID_NIC_11_3] = mmNIC5_QM1_BASE,
1299 [GAUDI2_QUEUE_ID_NIC_12_0] = mmNIC6_QM0_BASE,
1300 [GAUDI2_QUEUE_ID_NIC_12_1] = mmNIC6_QM0_BASE,
1301 [GAUDI2_QUEUE_ID_NIC_12_2] = mmNIC6_QM0_BASE,
1302 [GAUDI2_QUEUE_ID_NIC_12_3] = mmNIC6_QM0_BASE,
1303 [GAUDI2_QUEUE_ID_NIC_13_0] = mmNIC6_QM1_BASE,
1304 [GAUDI2_QUEUE_ID_NIC_13_1] = mmNIC6_QM1_BASE,
1305 [GAUDI2_QUEUE_ID_NIC_13_2] = mmNIC6_QM1_BASE,
1306 [GAUDI2_QUEUE_ID_NIC_13_3] = mmNIC6_QM1_BASE,
1307 [GAUDI2_QUEUE_ID_NIC_14_0] = mmNIC7_QM0_BASE,
1308 [GAUDI2_QUEUE_ID_NIC_14_1] = mmNIC7_QM0_BASE,
1309 [GAUDI2_QUEUE_ID_NIC_14_2] = mmNIC7_QM0_BASE,
1310 [GAUDI2_QUEUE_ID_NIC_14_3] = mmNIC7_QM0_BASE,
1311 [GAUDI2_QUEUE_ID_NIC_15_0] = mmNIC7_QM1_BASE,
1312 [GAUDI2_QUEUE_ID_NIC_15_1] = mmNIC7_QM1_BASE,
1313 [GAUDI2_QUEUE_ID_NIC_15_2] = mmNIC7_QM1_BASE,
1314 [GAUDI2_QUEUE_ID_NIC_15_3] = mmNIC7_QM1_BASE,
1315 [GAUDI2_QUEUE_ID_NIC_16_0] = mmNIC8_QM0_BASE,
1316 [GAUDI2_QUEUE_ID_NIC_16_1] = mmNIC8_QM0_BASE,
1317 [GAUDI2_QUEUE_ID_NIC_16_2] = mmNIC8_QM0_BASE,
1318 [GAUDI2_QUEUE_ID_NIC_16_3] = mmNIC8_QM0_BASE,
1319 [GAUDI2_QUEUE_ID_NIC_17_0] = mmNIC8_QM1_BASE,
1320 [GAUDI2_QUEUE_ID_NIC_17_1] = mmNIC8_QM1_BASE,
1321 [GAUDI2_QUEUE_ID_NIC_17_2] = mmNIC8_QM1_BASE,
1322 [GAUDI2_QUEUE_ID_NIC_17_3] = mmNIC8_QM1_BASE,
1323 [GAUDI2_QUEUE_ID_NIC_18_0] = mmNIC9_QM0_BASE,
1324 [GAUDI2_QUEUE_ID_NIC_18_1] = mmNIC9_QM0_BASE,
1325 [GAUDI2_QUEUE_ID_NIC_18_2] = mmNIC9_QM0_BASE,
1326 [GAUDI2_QUEUE_ID_NIC_18_3] = mmNIC9_QM0_BASE,
1327 [GAUDI2_QUEUE_ID_NIC_19_0] = mmNIC9_QM1_BASE,
1328 [GAUDI2_QUEUE_ID_NIC_19_1] = mmNIC9_QM1_BASE,
1329 [GAUDI2_QUEUE_ID_NIC_19_2] = mmNIC9_QM1_BASE,
1330 [GAUDI2_QUEUE_ID_NIC_19_3] = mmNIC9_QM1_BASE,
1331 [GAUDI2_QUEUE_ID_NIC_20_0] = mmNIC10_QM0_BASE,
1332 [GAUDI2_QUEUE_ID_NIC_20_1] = mmNIC10_QM0_BASE,
1333 [GAUDI2_QUEUE_ID_NIC_20_2] = mmNIC10_QM0_BASE,
1334 [GAUDI2_QUEUE_ID_NIC_20_3] = mmNIC10_QM0_BASE,
1335 [GAUDI2_QUEUE_ID_NIC_21_0] = mmNIC10_QM1_BASE,
1336 [GAUDI2_QUEUE_ID_NIC_21_1] = mmNIC10_QM1_BASE,
1337 [GAUDI2_QUEUE_ID_NIC_21_2] = mmNIC10_QM1_BASE,
1338 [GAUDI2_QUEUE_ID_NIC_21_3] = mmNIC10_QM1_BASE,
1339 [GAUDI2_QUEUE_ID_NIC_22_0] = mmNIC11_QM0_BASE,
1340 [GAUDI2_QUEUE_ID_NIC_22_1] = mmNIC11_QM0_BASE,
1341 [GAUDI2_QUEUE_ID_NIC_22_2] = mmNIC11_QM0_BASE,
1342 [GAUDI2_QUEUE_ID_NIC_22_3] = mmNIC11_QM0_BASE,
1343 [GAUDI2_QUEUE_ID_NIC_23_0] = mmNIC11_QM1_BASE,
1344 [GAUDI2_QUEUE_ID_NIC_23_1] = mmNIC11_QM1_BASE,
1345 [GAUDI2_QUEUE_ID_NIC_23_2] = mmNIC11_QM1_BASE,
1346 [GAUDI2_QUEUE_ID_NIC_23_3] = mmNIC11_QM1_BASE,
1347 [GAUDI2_QUEUE_ID_ROT_0_0] = mmROT0_QM_BASE,
1348 [GAUDI2_QUEUE_ID_ROT_0_1] = mmROT0_QM_BASE,
1349 [GAUDI2_QUEUE_ID_ROT_0_2] = mmROT0_QM_BASE,
1350 [GAUDI2_QUEUE_ID_ROT_0_3] = mmROT0_QM_BASE,
1351 [GAUDI2_QUEUE_ID_ROT_1_0] = mmROT1_QM_BASE,
1352 [GAUDI2_QUEUE_ID_ROT_1_1] = mmROT1_QM_BASE,
1353 [GAUDI2_QUEUE_ID_ROT_1_2] = mmROT1_QM_BASE,
1354 [GAUDI2_QUEUE_ID_ROT_1_3] = mmROT1_QM_BASE
1355 };
1356
1357 static const u32 gaudi2_arc_blocks_bases[NUM_ARC_CPUS] = {
1358 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_AUX_BASE,
1359 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_AUX_BASE,
1360 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_AUX_BASE,
1361 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_AUX_BASE,
1362 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_AUX_BASE,
1363 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_AUX_BASE,
1364 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_ARC_AUX_BASE,
1365 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_ARC_AUX_BASE,
1366 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_ARC_AUX_BASE,
1367 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_ARC_AUX_BASE,
1368 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_ARC_AUX_BASE,
1369 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_ARC_AUX_BASE,
1370 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_ARC_AUX_BASE,
1371 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_ARC_AUX_BASE,
1372 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_ARC_AUX_BASE,
1373 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_ARC_AUX_BASE,
1374 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_ARC_AUX_BASE,
1375 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_ARC_AUX_BASE,
1376 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_ARC_AUX_BASE,
1377 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_ARC_AUX_BASE,
1378 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_ARC_AUX_BASE,
1379 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_ARC_AUX_BASE,
1380 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_ARC_AUX_BASE,
1381 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_ARC_AUX_BASE,
1382 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_ARC_AUX_BASE,
1383 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_ARC_AUX_BASE,
1384 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_ARC_AUX_BASE,
1385 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_ARC_AUX_BASE,
1386 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_ARC_AUX_BASE,
1387 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_ARC_AUX_BASE,
1388 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_ARC_AUX_BASE,
1389 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_AUX_BASE,
1390 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_AUX_BASE,
1391 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_ARC_AUX_BASE,
1392 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_ARC_AUX_BASE,
1393 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_ARC_AUX_BASE,
1394 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_ARC_AUX_BASE,
1395 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_ARC_AUX_BASE,
1396 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_ARC_AUX_BASE,
1397 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_ARC_AUX_BASE,
1398 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_ARC_AUX_BASE,
1399 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_AUX_BASE,
1400 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_AUX_BASE,
1401 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_AUX_BASE,
1402 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_AUX_BASE,
1403 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_ARC_AUX0_BASE,
1404 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_ARC_AUX1_BASE,
1405 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_ARC_AUX0_BASE,
1406 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_ARC_AUX1_BASE,
1407 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_ARC_AUX0_BASE,
1408 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_ARC_AUX1_BASE,
1409 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_ARC_AUX0_BASE,
1410 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_ARC_AUX1_BASE,
1411 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_ARC_AUX0_BASE,
1412 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_ARC_AUX1_BASE,
1413 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_ARC_AUX0_BASE,
1414 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_ARC_AUX1_BASE,
1415 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_ARC_AUX0_BASE,
1416 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_ARC_AUX1_BASE,
1417 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_ARC_AUX0_BASE,
1418 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_ARC_AUX1_BASE,
1419 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_ARC_AUX0_BASE,
1420 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_ARC_AUX1_BASE,
1421 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_ARC_AUX0_BASE,
1422 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_ARC_AUX1_BASE,
1423 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_ARC_AUX0_BASE,
1424 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_ARC_AUX1_BASE,
1425 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_ARC_AUX0_BASE,
1426 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_ARC_AUX1_BASE,
1427 };
1428
1429 static const u32 gaudi2_arc_dccm_bases[NUM_ARC_CPUS] = {
1430 [CPU_ID_SCHED_ARC0] = mmARC_FARM_ARC0_DCCM0_BASE,
1431 [CPU_ID_SCHED_ARC1] = mmARC_FARM_ARC1_DCCM0_BASE,
1432 [CPU_ID_SCHED_ARC2] = mmARC_FARM_ARC2_DCCM0_BASE,
1433 [CPU_ID_SCHED_ARC3] = mmARC_FARM_ARC3_DCCM0_BASE,
1434 [CPU_ID_SCHED_ARC4] = mmDCORE1_MME_QM_ARC_DCCM_BASE,
1435 [CPU_ID_SCHED_ARC5] = mmDCORE3_MME_QM_ARC_DCCM_BASE,
1436 [CPU_ID_TPC_QMAN_ARC0] = mmDCORE0_TPC0_QM_DCCM_BASE,
1437 [CPU_ID_TPC_QMAN_ARC1] = mmDCORE0_TPC1_QM_DCCM_BASE,
1438 [CPU_ID_TPC_QMAN_ARC2] = mmDCORE0_TPC2_QM_DCCM_BASE,
1439 [CPU_ID_TPC_QMAN_ARC3] = mmDCORE0_TPC3_QM_DCCM_BASE,
1440 [CPU_ID_TPC_QMAN_ARC4] = mmDCORE0_TPC4_QM_DCCM_BASE,
1441 [CPU_ID_TPC_QMAN_ARC5] = mmDCORE0_TPC5_QM_DCCM_BASE,
1442 [CPU_ID_TPC_QMAN_ARC6] = mmDCORE1_TPC0_QM_DCCM_BASE,
1443 [CPU_ID_TPC_QMAN_ARC7] = mmDCORE1_TPC1_QM_DCCM_BASE,
1444 [CPU_ID_TPC_QMAN_ARC8] = mmDCORE1_TPC2_QM_DCCM_BASE,
1445 [CPU_ID_TPC_QMAN_ARC9] = mmDCORE1_TPC3_QM_DCCM_BASE,
1446 [CPU_ID_TPC_QMAN_ARC10] = mmDCORE1_TPC4_QM_DCCM_BASE,
1447 [CPU_ID_TPC_QMAN_ARC11] = mmDCORE1_TPC5_QM_DCCM_BASE,
1448 [CPU_ID_TPC_QMAN_ARC12] = mmDCORE2_TPC0_QM_DCCM_BASE,
1449 [CPU_ID_TPC_QMAN_ARC13] = mmDCORE2_TPC1_QM_DCCM_BASE,
1450 [CPU_ID_TPC_QMAN_ARC14] = mmDCORE2_TPC2_QM_DCCM_BASE,
1451 [CPU_ID_TPC_QMAN_ARC15] = mmDCORE2_TPC3_QM_DCCM_BASE,
1452 [CPU_ID_TPC_QMAN_ARC16] = mmDCORE2_TPC4_QM_DCCM_BASE,
1453 [CPU_ID_TPC_QMAN_ARC17] = mmDCORE2_TPC5_QM_DCCM_BASE,
1454 [CPU_ID_TPC_QMAN_ARC18] = mmDCORE3_TPC0_QM_DCCM_BASE,
1455 [CPU_ID_TPC_QMAN_ARC19] = mmDCORE3_TPC1_QM_DCCM_BASE,
1456 [CPU_ID_TPC_QMAN_ARC20] = mmDCORE3_TPC2_QM_DCCM_BASE,
1457 [CPU_ID_TPC_QMAN_ARC21] = mmDCORE3_TPC3_QM_DCCM_BASE,
1458 [CPU_ID_TPC_QMAN_ARC22] = mmDCORE3_TPC4_QM_DCCM_BASE,
1459 [CPU_ID_TPC_QMAN_ARC23] = mmDCORE3_TPC5_QM_DCCM_BASE,
1460 [CPU_ID_TPC_QMAN_ARC24] = mmDCORE0_TPC6_QM_DCCM_BASE,
1461 [CPU_ID_MME_QMAN_ARC0] = mmDCORE0_MME_QM_ARC_DCCM_BASE,
1462 [CPU_ID_MME_QMAN_ARC1] = mmDCORE2_MME_QM_ARC_DCCM_BASE,
1463 [CPU_ID_EDMA_QMAN_ARC0] = mmDCORE0_EDMA0_QM_DCCM_BASE,
1464 [CPU_ID_EDMA_QMAN_ARC1] = mmDCORE0_EDMA1_QM_DCCM_BASE,
1465 [CPU_ID_EDMA_QMAN_ARC2] = mmDCORE1_EDMA0_QM_DCCM_BASE,
1466 [CPU_ID_EDMA_QMAN_ARC3] = mmDCORE1_EDMA1_QM_DCCM_BASE,
1467 [CPU_ID_EDMA_QMAN_ARC4] = mmDCORE2_EDMA0_QM_DCCM_BASE,
1468 [CPU_ID_EDMA_QMAN_ARC5] = mmDCORE2_EDMA1_QM_DCCM_BASE,
1469 [CPU_ID_EDMA_QMAN_ARC6] = mmDCORE3_EDMA0_QM_DCCM_BASE,
1470 [CPU_ID_EDMA_QMAN_ARC7] = mmDCORE3_EDMA1_QM_DCCM_BASE,
1471 [CPU_ID_PDMA_QMAN_ARC0] = mmPDMA0_QM_ARC_DCCM_BASE,
1472 [CPU_ID_PDMA_QMAN_ARC1] = mmPDMA1_QM_ARC_DCCM_BASE,
1473 [CPU_ID_ROT_QMAN_ARC0] = mmROT0_QM_ARC_DCCM_BASE,
1474 [CPU_ID_ROT_QMAN_ARC1] = mmROT1_QM_ARC_DCCM_BASE,
1475 [CPU_ID_NIC_QMAN_ARC0] = mmNIC0_QM_DCCM0_BASE,
1476 [CPU_ID_NIC_QMAN_ARC1] = mmNIC0_QM_DCCM1_BASE,
1477 [CPU_ID_NIC_QMAN_ARC2] = mmNIC1_QM_DCCM0_BASE,
1478 [CPU_ID_NIC_QMAN_ARC3] = mmNIC1_QM_DCCM1_BASE,
1479 [CPU_ID_NIC_QMAN_ARC4] = mmNIC2_QM_DCCM0_BASE,
1480 [CPU_ID_NIC_QMAN_ARC5] = mmNIC2_QM_DCCM1_BASE,
1481 [CPU_ID_NIC_QMAN_ARC6] = mmNIC3_QM_DCCM0_BASE,
1482 [CPU_ID_NIC_QMAN_ARC7] = mmNIC3_QM_DCCM1_BASE,
1483 [CPU_ID_NIC_QMAN_ARC8] = mmNIC4_QM_DCCM0_BASE,
1484 [CPU_ID_NIC_QMAN_ARC9] = mmNIC4_QM_DCCM1_BASE,
1485 [CPU_ID_NIC_QMAN_ARC10] = mmNIC5_QM_DCCM0_BASE,
1486 [CPU_ID_NIC_QMAN_ARC11] = mmNIC5_QM_DCCM1_BASE,
1487 [CPU_ID_NIC_QMAN_ARC12] = mmNIC6_QM_DCCM0_BASE,
1488 [CPU_ID_NIC_QMAN_ARC13] = mmNIC6_QM_DCCM1_BASE,
1489 [CPU_ID_NIC_QMAN_ARC14] = mmNIC7_QM_DCCM0_BASE,
1490 [CPU_ID_NIC_QMAN_ARC15] = mmNIC7_QM_DCCM1_BASE,
1491 [CPU_ID_NIC_QMAN_ARC16] = mmNIC8_QM_DCCM0_BASE,
1492 [CPU_ID_NIC_QMAN_ARC17] = mmNIC8_QM_DCCM1_BASE,
1493 [CPU_ID_NIC_QMAN_ARC18] = mmNIC9_QM_DCCM0_BASE,
1494 [CPU_ID_NIC_QMAN_ARC19] = mmNIC9_QM_DCCM1_BASE,
1495 [CPU_ID_NIC_QMAN_ARC20] = mmNIC10_QM_DCCM0_BASE,
1496 [CPU_ID_NIC_QMAN_ARC21] = mmNIC10_QM_DCCM1_BASE,
1497 [CPU_ID_NIC_QMAN_ARC22] = mmNIC11_QM_DCCM0_BASE,
1498 [CPU_ID_NIC_QMAN_ARC23] = mmNIC11_QM_DCCM1_BASE,
1499 };
1500
1501 const u32 gaudi2_mme_ctrl_lo_blocks_bases[MME_ID_SIZE] = {
1502 [MME_ID_DCORE0] = mmDCORE0_MME_CTRL_LO_BASE,
1503 [MME_ID_DCORE1] = mmDCORE1_MME_CTRL_LO_BASE,
1504 [MME_ID_DCORE2] = mmDCORE2_MME_CTRL_LO_BASE,
1505 [MME_ID_DCORE3] = mmDCORE3_MME_CTRL_LO_BASE,
1506 };
1507
1508 static const u32 gaudi2_queue_id_to_arc_id[GAUDI2_QUEUE_ID_SIZE] = {
1509 [GAUDI2_QUEUE_ID_PDMA_0_0] = CPU_ID_PDMA_QMAN_ARC0,
1510 [GAUDI2_QUEUE_ID_PDMA_0_1] = CPU_ID_PDMA_QMAN_ARC0,
1511 [GAUDI2_QUEUE_ID_PDMA_0_2] = CPU_ID_PDMA_QMAN_ARC0,
1512 [GAUDI2_QUEUE_ID_PDMA_0_3] = CPU_ID_PDMA_QMAN_ARC0,
1513 [GAUDI2_QUEUE_ID_PDMA_1_0] = CPU_ID_PDMA_QMAN_ARC1,
1514 [GAUDI2_QUEUE_ID_PDMA_1_1] = CPU_ID_PDMA_QMAN_ARC1,
1515 [GAUDI2_QUEUE_ID_PDMA_1_2] = CPU_ID_PDMA_QMAN_ARC1,
1516 [GAUDI2_QUEUE_ID_PDMA_1_3] = CPU_ID_PDMA_QMAN_ARC1,
1517 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC0,
1518 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC0,
1519 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC0,
1520 [GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC0,
1521 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC1,
1522 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC1,
1523 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC1,
1524 [GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC1,
1525 [GAUDI2_QUEUE_ID_DCORE0_MME_0_0] = CPU_ID_MME_QMAN_ARC0,
1526 [GAUDI2_QUEUE_ID_DCORE0_MME_0_1] = CPU_ID_MME_QMAN_ARC0,
1527 [GAUDI2_QUEUE_ID_DCORE0_MME_0_2] = CPU_ID_MME_QMAN_ARC0,
1528 [GAUDI2_QUEUE_ID_DCORE0_MME_0_3] = CPU_ID_MME_QMAN_ARC0,
1529 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_0] = CPU_ID_TPC_QMAN_ARC0,
1530 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_1] = CPU_ID_TPC_QMAN_ARC0,
1531 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_2] = CPU_ID_TPC_QMAN_ARC0,
1532 [GAUDI2_QUEUE_ID_DCORE0_TPC_0_3] = CPU_ID_TPC_QMAN_ARC0,
1533 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_0] = CPU_ID_TPC_QMAN_ARC1,
1534 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_1] = CPU_ID_TPC_QMAN_ARC1,
1535 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_2] = CPU_ID_TPC_QMAN_ARC1,
1536 [GAUDI2_QUEUE_ID_DCORE0_TPC_1_3] = CPU_ID_TPC_QMAN_ARC1,
1537 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_0] = CPU_ID_TPC_QMAN_ARC2,
1538 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_1] = CPU_ID_TPC_QMAN_ARC2,
1539 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_2] = CPU_ID_TPC_QMAN_ARC2,
1540 [GAUDI2_QUEUE_ID_DCORE0_TPC_2_3] = CPU_ID_TPC_QMAN_ARC2,
1541 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_0] = CPU_ID_TPC_QMAN_ARC3,
1542 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_1] = CPU_ID_TPC_QMAN_ARC3,
1543 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_2] = CPU_ID_TPC_QMAN_ARC3,
1544 [GAUDI2_QUEUE_ID_DCORE0_TPC_3_3] = CPU_ID_TPC_QMAN_ARC3,
1545 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_0] = CPU_ID_TPC_QMAN_ARC4,
1546 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_1] = CPU_ID_TPC_QMAN_ARC4,
1547 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_2] = CPU_ID_TPC_QMAN_ARC4,
1548 [GAUDI2_QUEUE_ID_DCORE0_TPC_4_3] = CPU_ID_TPC_QMAN_ARC4,
1549 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_0] = CPU_ID_TPC_QMAN_ARC5,
1550 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_1] = CPU_ID_TPC_QMAN_ARC5,
1551 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_2] = CPU_ID_TPC_QMAN_ARC5,
1552 [GAUDI2_QUEUE_ID_DCORE0_TPC_5_3] = CPU_ID_TPC_QMAN_ARC5,
1553 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_0] = CPU_ID_TPC_QMAN_ARC24,
1554 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_1] = CPU_ID_TPC_QMAN_ARC24,
1555 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_2] = CPU_ID_TPC_QMAN_ARC24,
1556 [GAUDI2_QUEUE_ID_DCORE0_TPC_6_3] = CPU_ID_TPC_QMAN_ARC24,
1557 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC2,
1558 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC2,
1559 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC2,
1560 [GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC2,
1561 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC3,
1562 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC3,
1563 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC3,
1564 [GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC3,
1565 [GAUDI2_QUEUE_ID_DCORE1_MME_0_0] = CPU_ID_SCHED_ARC4,
1566 [GAUDI2_QUEUE_ID_DCORE1_MME_0_1] = CPU_ID_SCHED_ARC4,
1567 [GAUDI2_QUEUE_ID_DCORE1_MME_0_2] = CPU_ID_SCHED_ARC4,
1568 [GAUDI2_QUEUE_ID_DCORE1_MME_0_3] = CPU_ID_SCHED_ARC4,
1569 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_0] = CPU_ID_TPC_QMAN_ARC6,
1570 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_1] = CPU_ID_TPC_QMAN_ARC6,
1571 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_2] = CPU_ID_TPC_QMAN_ARC6,
1572 [GAUDI2_QUEUE_ID_DCORE1_TPC_0_3] = CPU_ID_TPC_QMAN_ARC6,
1573 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_0] = CPU_ID_TPC_QMAN_ARC7,
1574 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_1] = CPU_ID_TPC_QMAN_ARC7,
1575 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_2] = CPU_ID_TPC_QMAN_ARC7,
1576 [GAUDI2_QUEUE_ID_DCORE1_TPC_1_3] = CPU_ID_TPC_QMAN_ARC7,
1577 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_0] = CPU_ID_TPC_QMAN_ARC8,
1578 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_1] = CPU_ID_TPC_QMAN_ARC8,
1579 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_2] = CPU_ID_TPC_QMAN_ARC8,
1580 [GAUDI2_QUEUE_ID_DCORE1_TPC_2_3] = CPU_ID_TPC_QMAN_ARC8,
1581 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_0] = CPU_ID_TPC_QMAN_ARC9,
1582 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_1] = CPU_ID_TPC_QMAN_ARC9,
1583 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_2] = CPU_ID_TPC_QMAN_ARC9,
1584 [GAUDI2_QUEUE_ID_DCORE1_TPC_3_3] = CPU_ID_TPC_QMAN_ARC9,
1585 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_0] = CPU_ID_TPC_QMAN_ARC10,
1586 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_1] = CPU_ID_TPC_QMAN_ARC10,
1587 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_2] = CPU_ID_TPC_QMAN_ARC10,
1588 [GAUDI2_QUEUE_ID_DCORE1_TPC_4_3] = CPU_ID_TPC_QMAN_ARC10,
1589 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_0] = CPU_ID_TPC_QMAN_ARC11,
1590 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_1] = CPU_ID_TPC_QMAN_ARC11,
1591 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_2] = CPU_ID_TPC_QMAN_ARC11,
1592 [GAUDI2_QUEUE_ID_DCORE1_TPC_5_3] = CPU_ID_TPC_QMAN_ARC11,
1593 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC4,
1594 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC4,
1595 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC4,
1596 [GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC4,
1597 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC5,
1598 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC5,
1599 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC5,
1600 [GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC5,
1601 [GAUDI2_QUEUE_ID_DCORE2_MME_0_0] = CPU_ID_MME_QMAN_ARC1,
1602 [GAUDI2_QUEUE_ID_DCORE2_MME_0_1] = CPU_ID_MME_QMAN_ARC1,
1603 [GAUDI2_QUEUE_ID_DCORE2_MME_0_2] = CPU_ID_MME_QMAN_ARC1,
1604 [GAUDI2_QUEUE_ID_DCORE2_MME_0_3] = CPU_ID_MME_QMAN_ARC1,
1605 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_0] = CPU_ID_TPC_QMAN_ARC12,
1606 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_1] = CPU_ID_TPC_QMAN_ARC12,
1607 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_2] = CPU_ID_TPC_QMAN_ARC12,
1608 [GAUDI2_QUEUE_ID_DCORE2_TPC_0_3] = CPU_ID_TPC_QMAN_ARC12,
1609 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_0] = CPU_ID_TPC_QMAN_ARC13,
1610 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_1] = CPU_ID_TPC_QMAN_ARC13,
1611 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_2] = CPU_ID_TPC_QMAN_ARC13,
1612 [GAUDI2_QUEUE_ID_DCORE2_TPC_1_3] = CPU_ID_TPC_QMAN_ARC13,
1613 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_0] = CPU_ID_TPC_QMAN_ARC14,
1614 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_1] = CPU_ID_TPC_QMAN_ARC14,
1615 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_2] = CPU_ID_TPC_QMAN_ARC14,
1616 [GAUDI2_QUEUE_ID_DCORE2_TPC_2_3] = CPU_ID_TPC_QMAN_ARC14,
1617 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_0] = CPU_ID_TPC_QMAN_ARC15,
1618 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_1] = CPU_ID_TPC_QMAN_ARC15,
1619 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_2] = CPU_ID_TPC_QMAN_ARC15,
1620 [GAUDI2_QUEUE_ID_DCORE2_TPC_3_3] = CPU_ID_TPC_QMAN_ARC15,
1621 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_0] = CPU_ID_TPC_QMAN_ARC16,
1622 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_1] = CPU_ID_TPC_QMAN_ARC16,
1623 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_2] = CPU_ID_TPC_QMAN_ARC16,
1624 [GAUDI2_QUEUE_ID_DCORE2_TPC_4_3] = CPU_ID_TPC_QMAN_ARC16,
1625 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_0] = CPU_ID_TPC_QMAN_ARC17,
1626 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_1] = CPU_ID_TPC_QMAN_ARC17,
1627 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_2] = CPU_ID_TPC_QMAN_ARC17,
1628 [GAUDI2_QUEUE_ID_DCORE2_TPC_5_3] = CPU_ID_TPC_QMAN_ARC17,
1629 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0] = CPU_ID_EDMA_QMAN_ARC6,
1630 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1] = CPU_ID_EDMA_QMAN_ARC6,
1631 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2] = CPU_ID_EDMA_QMAN_ARC6,
1632 [GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3] = CPU_ID_EDMA_QMAN_ARC6,
1633 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0] = CPU_ID_EDMA_QMAN_ARC7,
1634 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1] = CPU_ID_EDMA_QMAN_ARC7,
1635 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2] = CPU_ID_EDMA_QMAN_ARC7,
1636 [GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3] = CPU_ID_EDMA_QMAN_ARC7,
1637 [GAUDI2_QUEUE_ID_DCORE3_MME_0_0] = CPU_ID_SCHED_ARC5,
1638 [GAUDI2_QUEUE_ID_DCORE3_MME_0_1] = CPU_ID_SCHED_ARC5,
1639 [GAUDI2_QUEUE_ID_DCORE3_MME_0_2] = CPU_ID_SCHED_ARC5,
1640 [GAUDI2_QUEUE_ID_DCORE3_MME_0_3] = CPU_ID_SCHED_ARC5,
1641 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_0] = CPU_ID_TPC_QMAN_ARC18,
1642 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_1] = CPU_ID_TPC_QMAN_ARC18,
1643 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_2] = CPU_ID_TPC_QMAN_ARC18,
1644 [GAUDI2_QUEUE_ID_DCORE3_TPC_0_3] = CPU_ID_TPC_QMAN_ARC18,
1645 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_0] = CPU_ID_TPC_QMAN_ARC19,
1646 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_1] = CPU_ID_TPC_QMAN_ARC19,
1647 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_2] = CPU_ID_TPC_QMAN_ARC19,
1648 [GAUDI2_QUEUE_ID_DCORE3_TPC_1_3] = CPU_ID_TPC_QMAN_ARC19,
1649 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_0] = CPU_ID_TPC_QMAN_ARC20,
1650 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_1] = CPU_ID_TPC_QMAN_ARC20,
1651 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_2] = CPU_ID_TPC_QMAN_ARC20,
1652 [GAUDI2_QUEUE_ID_DCORE3_TPC_2_3] = CPU_ID_TPC_QMAN_ARC20,
1653 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_0] = CPU_ID_TPC_QMAN_ARC21,
1654 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_1] = CPU_ID_TPC_QMAN_ARC21,
1655 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_2] = CPU_ID_TPC_QMAN_ARC21,
1656 [GAUDI2_QUEUE_ID_DCORE3_TPC_3_3] = CPU_ID_TPC_QMAN_ARC21,
1657 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_0] = CPU_ID_TPC_QMAN_ARC22,
1658 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_1] = CPU_ID_TPC_QMAN_ARC22,
1659 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_2] = CPU_ID_TPC_QMAN_ARC22,
1660 [GAUDI2_QUEUE_ID_DCORE3_TPC_4_3] = CPU_ID_TPC_QMAN_ARC22,
1661 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_0] = CPU_ID_TPC_QMAN_ARC23,
1662 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_1] = CPU_ID_TPC_QMAN_ARC23,
1663 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_2] = CPU_ID_TPC_QMAN_ARC23,
1664 [GAUDI2_QUEUE_ID_DCORE3_TPC_5_3] = CPU_ID_TPC_QMAN_ARC23,
1665 [GAUDI2_QUEUE_ID_NIC_0_0] = CPU_ID_NIC_QMAN_ARC0,
1666 [GAUDI2_QUEUE_ID_NIC_0_1] = CPU_ID_NIC_QMAN_ARC0,
1667 [GAUDI2_QUEUE_ID_NIC_0_2] = CPU_ID_NIC_QMAN_ARC0,
1668 [GAUDI2_QUEUE_ID_NIC_0_3] = CPU_ID_NIC_QMAN_ARC0,
1669 [GAUDI2_QUEUE_ID_NIC_1_0] = CPU_ID_NIC_QMAN_ARC1,
1670 [GAUDI2_QUEUE_ID_NIC_1_1] = CPU_ID_NIC_QMAN_ARC1,
1671 [GAUDI2_QUEUE_ID_NIC_1_2] = CPU_ID_NIC_QMAN_ARC1,
1672 [GAUDI2_QUEUE_ID_NIC_1_3] = CPU_ID_NIC_QMAN_ARC1,
1673 [GAUDI2_QUEUE_ID_NIC_2_0] = CPU_ID_NIC_QMAN_ARC2,
1674 [GAUDI2_QUEUE_ID_NIC_2_1] = CPU_ID_NIC_QMAN_ARC2,
1675 [GAUDI2_QUEUE_ID_NIC_2_2] = CPU_ID_NIC_QMAN_ARC2,
1676 [GAUDI2_QUEUE_ID_NIC_2_3] = CPU_ID_NIC_QMAN_ARC2,
1677 [GAUDI2_QUEUE_ID_NIC_3_0] = CPU_ID_NIC_QMAN_ARC3,
1678 [GAUDI2_QUEUE_ID_NIC_3_1] = CPU_ID_NIC_QMAN_ARC3,
1679 [GAUDI2_QUEUE_ID_NIC_3_2] = CPU_ID_NIC_QMAN_ARC3,
1680 [GAUDI2_QUEUE_ID_NIC_3_3] = CPU_ID_NIC_QMAN_ARC3,
1681 [GAUDI2_QUEUE_ID_NIC_4_0] = CPU_ID_NIC_QMAN_ARC4,
1682 [GAUDI2_QUEUE_ID_NIC_4_1] = CPU_ID_NIC_QMAN_ARC4,
1683 [GAUDI2_QUEUE_ID_NIC_4_2] = CPU_ID_NIC_QMAN_ARC4,
1684 [GAUDI2_QUEUE_ID_NIC_4_3] = CPU_ID_NIC_QMAN_ARC4,
1685 [GAUDI2_QUEUE_ID_NIC_5_0] = CPU_ID_NIC_QMAN_ARC5,
1686 [GAUDI2_QUEUE_ID_NIC_5_1] = CPU_ID_NIC_QMAN_ARC5,
1687 [GAUDI2_QUEUE_ID_NIC_5_2] = CPU_ID_NIC_QMAN_ARC5,
1688 [GAUDI2_QUEUE_ID_NIC_5_3] = CPU_ID_NIC_QMAN_ARC5,
1689 [GAUDI2_QUEUE_ID_NIC_6_0] = CPU_ID_NIC_QMAN_ARC6,
1690 [GAUDI2_QUEUE_ID_NIC_6_1] = CPU_ID_NIC_QMAN_ARC6,
1691 [GAUDI2_QUEUE_ID_NIC_6_2] = CPU_ID_NIC_QMAN_ARC6,
1692 [GAUDI2_QUEUE_ID_NIC_6_3] = CPU_ID_NIC_QMAN_ARC6,
1693 [GAUDI2_QUEUE_ID_NIC_7_0] = CPU_ID_NIC_QMAN_ARC7,
1694 [GAUDI2_QUEUE_ID_NIC_7_1] = CPU_ID_NIC_QMAN_ARC7,
1695 [GAUDI2_QUEUE_ID_NIC_7_2] = CPU_ID_NIC_QMAN_ARC7,
1696 [GAUDI2_QUEUE_ID_NIC_7_3] = CPU_ID_NIC_QMAN_ARC7,
1697 [GAUDI2_QUEUE_ID_NIC_8_0] = CPU_ID_NIC_QMAN_ARC8,
1698 [GAUDI2_QUEUE_ID_NIC_8_1] = CPU_ID_NIC_QMAN_ARC8,
1699 [GAUDI2_QUEUE_ID_NIC_8_2] = CPU_ID_NIC_QMAN_ARC8,
1700 [GAUDI2_QUEUE_ID_NIC_8_3] = CPU_ID_NIC_QMAN_ARC8,
1701 [GAUDI2_QUEUE_ID_NIC_9_0] = CPU_ID_NIC_QMAN_ARC9,
1702 [GAUDI2_QUEUE_ID_NIC_9_1] = CPU_ID_NIC_QMAN_ARC9,
1703 [GAUDI2_QUEUE_ID_NIC_9_2] = CPU_ID_NIC_QMAN_ARC9,
1704 [GAUDI2_QUEUE_ID_NIC_9_3] = CPU_ID_NIC_QMAN_ARC9,
1705 [GAUDI2_QUEUE_ID_NIC_10_0] = CPU_ID_NIC_QMAN_ARC10,
1706 [GAUDI2_QUEUE_ID_NIC_10_1] = CPU_ID_NIC_QMAN_ARC10,
1707 [GAUDI2_QUEUE_ID_NIC_10_2] = CPU_ID_NIC_QMAN_ARC10,
1708 [GAUDI2_QUEUE_ID_NIC_10_3] = CPU_ID_NIC_QMAN_ARC10,
1709 [GAUDI2_QUEUE_ID_NIC_11_0] = CPU_ID_NIC_QMAN_ARC11,
1710 [GAUDI2_QUEUE_ID_NIC_11_1] = CPU_ID_NIC_QMAN_ARC11,
1711 [GAUDI2_QUEUE_ID_NIC_11_2] = CPU_ID_NIC_QMAN_ARC11,
1712 [GAUDI2_QUEUE_ID_NIC_11_3] = CPU_ID_NIC_QMAN_ARC11,
1713 [GAUDI2_QUEUE_ID_NIC_12_0] = CPU_ID_NIC_QMAN_ARC12,
1714 [GAUDI2_QUEUE_ID_NIC_12_1] = CPU_ID_NIC_QMAN_ARC12,
1715 [GAUDI2_QUEUE_ID_NIC_12_2] = CPU_ID_NIC_QMAN_ARC12,
1716 [GAUDI2_QUEUE_ID_NIC_12_3] = CPU_ID_NIC_QMAN_ARC12,
1717 [GAUDI2_QUEUE_ID_NIC_13_0] = CPU_ID_NIC_QMAN_ARC13,
1718 [GAUDI2_QUEUE_ID_NIC_13_1] = CPU_ID_NIC_QMAN_ARC13,
1719 [GAUDI2_QUEUE_ID_NIC_13_2] = CPU_ID_NIC_QMAN_ARC13,
1720 [GAUDI2_QUEUE_ID_NIC_13_3] = CPU_ID_NIC_QMAN_ARC13,
1721 [GAUDI2_QUEUE_ID_NIC_14_0] = CPU_ID_NIC_QMAN_ARC14,
1722 [GAUDI2_QUEUE_ID_NIC_14_1] = CPU_ID_NIC_QMAN_ARC14,
1723 [GAUDI2_QUEUE_ID_NIC_14_2] = CPU_ID_NIC_QMAN_ARC14,
1724 [GAUDI2_QUEUE_ID_NIC_14_3] = CPU_ID_NIC_QMAN_ARC14,
1725 [GAUDI2_QUEUE_ID_NIC_15_0] = CPU_ID_NIC_QMAN_ARC15,
1726 [GAUDI2_QUEUE_ID_NIC_15_1] = CPU_ID_NIC_QMAN_ARC15,
1727 [GAUDI2_QUEUE_ID_NIC_15_2] = CPU_ID_NIC_QMAN_ARC15,
1728 [GAUDI2_QUEUE_ID_NIC_15_3] = CPU_ID_NIC_QMAN_ARC15,
1729 [GAUDI2_QUEUE_ID_NIC_16_0] = CPU_ID_NIC_QMAN_ARC16,
1730 [GAUDI2_QUEUE_ID_NIC_16_1] = CPU_ID_NIC_QMAN_ARC16,
1731 [GAUDI2_QUEUE_ID_NIC_16_2] = CPU_ID_NIC_QMAN_ARC16,
1732 [GAUDI2_QUEUE_ID_NIC_16_3] = CPU_ID_NIC_QMAN_ARC16,
1733 [GAUDI2_QUEUE_ID_NIC_17_0] = CPU_ID_NIC_QMAN_ARC17,
1734 [GAUDI2_QUEUE_ID_NIC_17_1] = CPU_ID_NIC_QMAN_ARC17,
1735 [GAUDI2_QUEUE_ID_NIC_17_2] = CPU_ID_NIC_QMAN_ARC17,
1736 [GAUDI2_QUEUE_ID_NIC_17_3] = CPU_ID_NIC_QMAN_ARC17,
1737 [GAUDI2_QUEUE_ID_NIC_18_0] = CPU_ID_NIC_QMAN_ARC18,
1738 [GAUDI2_QUEUE_ID_NIC_18_1] = CPU_ID_NIC_QMAN_ARC18,
1739 [GAUDI2_QUEUE_ID_NIC_18_2] = CPU_ID_NIC_QMAN_ARC18,
1740 [GAUDI2_QUEUE_ID_NIC_18_3] = CPU_ID_NIC_QMAN_ARC18,
1741 [GAUDI2_QUEUE_ID_NIC_19_0] = CPU_ID_NIC_QMAN_ARC19,
1742 [GAUDI2_QUEUE_ID_NIC_19_1] = CPU_ID_NIC_QMAN_ARC19,
1743 [GAUDI2_QUEUE_ID_NIC_19_2] = CPU_ID_NIC_QMAN_ARC19,
1744 [GAUDI2_QUEUE_ID_NIC_19_3] = CPU_ID_NIC_QMAN_ARC19,
1745 [GAUDI2_QUEUE_ID_NIC_20_0] = CPU_ID_NIC_QMAN_ARC20,
1746 [GAUDI2_QUEUE_ID_NIC_20_1] = CPU_ID_NIC_QMAN_ARC20,
1747 [GAUDI2_QUEUE_ID_NIC_20_2] = CPU_ID_NIC_QMAN_ARC20,
1748 [GAUDI2_QUEUE_ID_NIC_20_3] = CPU_ID_NIC_QMAN_ARC20,
1749 [GAUDI2_QUEUE_ID_NIC_21_0] = CPU_ID_NIC_QMAN_ARC21,
1750 [GAUDI2_QUEUE_ID_NIC_21_1] = CPU_ID_NIC_QMAN_ARC21,
1751 [GAUDI2_QUEUE_ID_NIC_21_2] = CPU_ID_NIC_QMAN_ARC21,
1752 [GAUDI2_QUEUE_ID_NIC_21_3] = CPU_ID_NIC_QMAN_ARC21,
1753 [GAUDI2_QUEUE_ID_NIC_22_0] = CPU_ID_NIC_QMAN_ARC22,
1754 [GAUDI2_QUEUE_ID_NIC_22_1] = CPU_ID_NIC_QMAN_ARC22,
1755 [GAUDI2_QUEUE_ID_NIC_22_2] = CPU_ID_NIC_QMAN_ARC22,
1756 [GAUDI2_QUEUE_ID_NIC_22_3] = CPU_ID_NIC_QMAN_ARC22,
1757 [GAUDI2_QUEUE_ID_NIC_23_0] = CPU_ID_NIC_QMAN_ARC23,
1758 [GAUDI2_QUEUE_ID_NIC_23_1] = CPU_ID_NIC_QMAN_ARC23,
1759 [GAUDI2_QUEUE_ID_NIC_23_2] = CPU_ID_NIC_QMAN_ARC23,
1760 [GAUDI2_QUEUE_ID_NIC_23_3] = CPU_ID_NIC_QMAN_ARC23,
1761 [GAUDI2_QUEUE_ID_ROT_0_0] = CPU_ID_ROT_QMAN_ARC0,
1762 [GAUDI2_QUEUE_ID_ROT_0_1] = CPU_ID_ROT_QMAN_ARC0,
1763 [GAUDI2_QUEUE_ID_ROT_0_2] = CPU_ID_ROT_QMAN_ARC0,
1764 [GAUDI2_QUEUE_ID_ROT_0_3] = CPU_ID_ROT_QMAN_ARC0,
1765 [GAUDI2_QUEUE_ID_ROT_1_0] = CPU_ID_ROT_QMAN_ARC1,
1766 [GAUDI2_QUEUE_ID_ROT_1_1] = CPU_ID_ROT_QMAN_ARC1,
1767 [GAUDI2_QUEUE_ID_ROT_1_2] = CPU_ID_ROT_QMAN_ARC1,
1768 [GAUDI2_QUEUE_ID_ROT_1_3] = CPU_ID_ROT_QMAN_ARC1
1769 };
1770
1771 const u32 gaudi2_dma_core_blocks_bases[DMA_CORE_ID_SIZE] = {
1772 [DMA_CORE_ID_PDMA0] = mmPDMA0_CORE_BASE,
1773 [DMA_CORE_ID_PDMA1] = mmPDMA1_CORE_BASE,
1774 [DMA_CORE_ID_EDMA0] = mmDCORE0_EDMA0_CORE_BASE,
1775 [DMA_CORE_ID_EDMA1] = mmDCORE0_EDMA1_CORE_BASE,
1776 [DMA_CORE_ID_EDMA2] = mmDCORE1_EDMA0_CORE_BASE,
1777 [DMA_CORE_ID_EDMA3] = mmDCORE1_EDMA1_CORE_BASE,
1778 [DMA_CORE_ID_EDMA4] = mmDCORE2_EDMA0_CORE_BASE,
1779 [DMA_CORE_ID_EDMA5] = mmDCORE2_EDMA1_CORE_BASE,
1780 [DMA_CORE_ID_EDMA6] = mmDCORE3_EDMA0_CORE_BASE,
1781 [DMA_CORE_ID_EDMA7] = mmDCORE3_EDMA1_CORE_BASE,
1782 [DMA_CORE_ID_KDMA] = mmARC_FARM_KDMA_BASE
1783 };
1784
1785 const u32 gaudi2_mme_acc_blocks_bases[MME_ID_SIZE] = {
1786 [MME_ID_DCORE0] = mmDCORE0_MME_ACC_BASE,
1787 [MME_ID_DCORE1] = mmDCORE1_MME_ACC_BASE,
1788 [MME_ID_DCORE2] = mmDCORE2_MME_ACC_BASE,
1789 [MME_ID_DCORE3] = mmDCORE3_MME_ACC_BASE
1790 };
1791
1792 static const u32 gaudi2_tpc_cfg_blocks_bases[TPC_ID_SIZE] = {
1793 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_CFG_BASE,
1794 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_CFG_BASE,
1795 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_CFG_BASE,
1796 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_CFG_BASE,
1797 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_CFG_BASE,
1798 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_CFG_BASE,
1799 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_CFG_BASE,
1800 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_CFG_BASE,
1801 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_CFG_BASE,
1802 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_CFG_BASE,
1803 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_CFG_BASE,
1804 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_CFG_BASE,
1805 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_CFG_BASE,
1806 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_CFG_BASE,
1807 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_CFG_BASE,
1808 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_CFG_BASE,
1809 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_CFG_BASE,
1810 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_CFG_BASE,
1811 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_CFG_BASE,
1812 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_CFG_BASE,
1813 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_CFG_BASE,
1814 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_CFG_BASE,
1815 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_CFG_BASE,
1816 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_CFG_BASE,
1817 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_CFG_BASE,
1818 };
1819
1820 static const u32 gaudi2_tpc_eml_cfg_blocks_bases[TPC_ID_SIZE] = {
1821 [TPC_ID_DCORE0_TPC0] = mmDCORE0_TPC0_EML_CFG_BASE,
1822 [TPC_ID_DCORE0_TPC1] = mmDCORE0_TPC1_EML_CFG_BASE,
1823 [TPC_ID_DCORE0_TPC2] = mmDCORE0_TPC2_EML_CFG_BASE,
1824 [TPC_ID_DCORE0_TPC3] = mmDCORE0_TPC3_EML_CFG_BASE,
1825 [TPC_ID_DCORE0_TPC4] = mmDCORE0_TPC4_EML_CFG_BASE,
1826 [TPC_ID_DCORE0_TPC5] = mmDCORE0_TPC5_EML_CFG_BASE,
1827 [TPC_ID_DCORE1_TPC0] = mmDCORE1_TPC0_EML_CFG_BASE,
1828 [TPC_ID_DCORE1_TPC1] = mmDCORE1_TPC1_EML_CFG_BASE,
1829 [TPC_ID_DCORE1_TPC2] = mmDCORE1_TPC2_EML_CFG_BASE,
1830 [TPC_ID_DCORE1_TPC3] = mmDCORE1_TPC3_EML_CFG_BASE,
1831 [TPC_ID_DCORE1_TPC4] = mmDCORE1_TPC4_EML_CFG_BASE,
1832 [TPC_ID_DCORE1_TPC5] = mmDCORE1_TPC5_EML_CFG_BASE,
1833 [TPC_ID_DCORE2_TPC0] = mmDCORE2_TPC0_EML_CFG_BASE,
1834 [TPC_ID_DCORE2_TPC1] = mmDCORE2_TPC1_EML_CFG_BASE,
1835 [TPC_ID_DCORE2_TPC2] = mmDCORE2_TPC2_EML_CFG_BASE,
1836 [TPC_ID_DCORE2_TPC3] = mmDCORE2_TPC3_EML_CFG_BASE,
1837 [TPC_ID_DCORE2_TPC4] = mmDCORE2_TPC4_EML_CFG_BASE,
1838 [TPC_ID_DCORE2_TPC5] = mmDCORE2_TPC5_EML_CFG_BASE,
1839 [TPC_ID_DCORE3_TPC0] = mmDCORE3_TPC0_EML_CFG_BASE,
1840 [TPC_ID_DCORE3_TPC1] = mmDCORE3_TPC1_EML_CFG_BASE,
1841 [TPC_ID_DCORE3_TPC2] = mmDCORE3_TPC2_EML_CFG_BASE,
1842 [TPC_ID_DCORE3_TPC3] = mmDCORE3_TPC3_EML_CFG_BASE,
1843 [TPC_ID_DCORE3_TPC4] = mmDCORE3_TPC4_EML_CFG_BASE,
1844 [TPC_ID_DCORE3_TPC5] = mmDCORE3_TPC5_EML_CFG_BASE,
1845 [TPC_ID_DCORE0_TPC6] = mmDCORE0_TPC6_EML_CFG_BASE,
1846 };
1847
1848 const u32 gaudi2_rot_blocks_bases[ROTATOR_ID_SIZE] = {
1849 [ROTATOR_ID_0] = mmROT0_BASE,
1850 [ROTATOR_ID_1] = mmROT1_BASE
1851 };
1852
1853 static const u32 gaudi2_tpc_id_to_queue_id[TPC_ID_SIZE] = {
1854 [TPC_ID_DCORE0_TPC0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0,
1855 [TPC_ID_DCORE0_TPC1] = GAUDI2_QUEUE_ID_DCORE0_TPC_1_0,
1856 [TPC_ID_DCORE0_TPC2] = GAUDI2_QUEUE_ID_DCORE0_TPC_2_0,
1857 [TPC_ID_DCORE0_TPC3] = GAUDI2_QUEUE_ID_DCORE0_TPC_3_0,
1858 [TPC_ID_DCORE0_TPC4] = GAUDI2_QUEUE_ID_DCORE0_TPC_4_0,
1859 [TPC_ID_DCORE0_TPC5] = GAUDI2_QUEUE_ID_DCORE0_TPC_5_0,
1860 [TPC_ID_DCORE1_TPC0] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0,
1861 [TPC_ID_DCORE1_TPC1] = GAUDI2_QUEUE_ID_DCORE1_TPC_1_0,
1862 [TPC_ID_DCORE1_TPC2] = GAUDI2_QUEUE_ID_DCORE1_TPC_2_0,
1863 [TPC_ID_DCORE1_TPC3] = GAUDI2_QUEUE_ID_DCORE1_TPC_3_0,
1864 [TPC_ID_DCORE1_TPC4] = GAUDI2_QUEUE_ID_DCORE1_TPC_4_0,
1865 [TPC_ID_DCORE1_TPC5] = GAUDI2_QUEUE_ID_DCORE1_TPC_5_0,
1866 [TPC_ID_DCORE2_TPC0] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0,
1867 [TPC_ID_DCORE2_TPC1] = GAUDI2_QUEUE_ID_DCORE2_TPC_1_0,
1868 [TPC_ID_DCORE2_TPC2] = GAUDI2_QUEUE_ID_DCORE2_TPC_2_0,
1869 [TPC_ID_DCORE2_TPC3] = GAUDI2_QUEUE_ID_DCORE2_TPC_3_0,
1870 [TPC_ID_DCORE2_TPC4] = GAUDI2_QUEUE_ID_DCORE2_TPC_4_0,
1871 [TPC_ID_DCORE2_TPC5] = GAUDI2_QUEUE_ID_DCORE2_TPC_5_0,
1872 [TPC_ID_DCORE3_TPC0] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0,
1873 [TPC_ID_DCORE3_TPC1] = GAUDI2_QUEUE_ID_DCORE3_TPC_1_0,
1874 [TPC_ID_DCORE3_TPC2] = GAUDI2_QUEUE_ID_DCORE3_TPC_2_0,
1875 [TPC_ID_DCORE3_TPC3] = GAUDI2_QUEUE_ID_DCORE3_TPC_3_0,
1876 [TPC_ID_DCORE3_TPC4] = GAUDI2_QUEUE_ID_DCORE3_TPC_4_0,
1877 [TPC_ID_DCORE3_TPC5] = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0,
1878 [TPC_ID_DCORE0_TPC6] = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0,
1879 };
1880
1881 static const u32 gaudi2_rot_id_to_queue_id[ROTATOR_ID_SIZE] = {
1882 [ROTATOR_ID_0] = GAUDI2_QUEUE_ID_ROT_0_0,
1883 [ROTATOR_ID_1] = GAUDI2_QUEUE_ID_ROT_1_0,
1884 };
1885
1886 static const u32 gaudi2_tpc_engine_id_to_tpc_id[] = {
1887 [GAUDI2_DCORE0_ENGINE_ID_TPC_0] = TPC_ID_DCORE0_TPC0,
1888 [GAUDI2_DCORE0_ENGINE_ID_TPC_1] = TPC_ID_DCORE0_TPC1,
1889 [GAUDI2_DCORE0_ENGINE_ID_TPC_2] = TPC_ID_DCORE0_TPC2,
1890 [GAUDI2_DCORE0_ENGINE_ID_TPC_3] = TPC_ID_DCORE0_TPC3,
1891 [GAUDI2_DCORE0_ENGINE_ID_TPC_4] = TPC_ID_DCORE0_TPC4,
1892 [GAUDI2_DCORE0_ENGINE_ID_TPC_5] = TPC_ID_DCORE0_TPC5,
1893 [GAUDI2_DCORE1_ENGINE_ID_TPC_0] = TPC_ID_DCORE1_TPC0,
1894 [GAUDI2_DCORE1_ENGINE_ID_TPC_1] = TPC_ID_DCORE1_TPC1,
1895 [GAUDI2_DCORE1_ENGINE_ID_TPC_2] = TPC_ID_DCORE1_TPC2,
1896 [GAUDI2_DCORE1_ENGINE_ID_TPC_3] = TPC_ID_DCORE1_TPC3,
1897 [GAUDI2_DCORE1_ENGINE_ID_TPC_4] = TPC_ID_DCORE1_TPC4,
1898 [GAUDI2_DCORE1_ENGINE_ID_TPC_5] = TPC_ID_DCORE1_TPC5,
1899 [GAUDI2_DCORE2_ENGINE_ID_TPC_0] = TPC_ID_DCORE2_TPC0,
1900 [GAUDI2_DCORE2_ENGINE_ID_TPC_1] = TPC_ID_DCORE2_TPC1,
1901 [GAUDI2_DCORE2_ENGINE_ID_TPC_2] = TPC_ID_DCORE2_TPC2,
1902 [GAUDI2_DCORE2_ENGINE_ID_TPC_3] = TPC_ID_DCORE2_TPC3,
1903 [GAUDI2_DCORE2_ENGINE_ID_TPC_4] = TPC_ID_DCORE2_TPC4,
1904 [GAUDI2_DCORE2_ENGINE_ID_TPC_5] = TPC_ID_DCORE2_TPC5,
1905 [GAUDI2_DCORE3_ENGINE_ID_TPC_0] = TPC_ID_DCORE3_TPC0,
1906 [GAUDI2_DCORE3_ENGINE_ID_TPC_1] = TPC_ID_DCORE3_TPC1,
1907 [GAUDI2_DCORE3_ENGINE_ID_TPC_2] = TPC_ID_DCORE3_TPC2,
1908 [GAUDI2_DCORE3_ENGINE_ID_TPC_3] = TPC_ID_DCORE3_TPC3,
1909 [GAUDI2_DCORE3_ENGINE_ID_TPC_4] = TPC_ID_DCORE3_TPC4,
1910 [GAUDI2_DCORE3_ENGINE_ID_TPC_5] = TPC_ID_DCORE3_TPC5,
1911 /* the PCI TPC is placed last (mapped liked HW) */
1912 [GAUDI2_DCORE0_ENGINE_ID_TPC_6] = TPC_ID_DCORE0_TPC6,
1913 };
1914
1915 static const u32 gaudi2_mme_engine_id_to_mme_id[] = {
1916 [GAUDI2_DCORE0_ENGINE_ID_MME] = MME_ID_DCORE0,
1917 [GAUDI2_DCORE1_ENGINE_ID_MME] = MME_ID_DCORE1,
1918 [GAUDI2_DCORE2_ENGINE_ID_MME] = MME_ID_DCORE2,
1919 [GAUDI2_DCORE3_ENGINE_ID_MME] = MME_ID_DCORE3,
1920 };
1921
1922 static const u32 gaudi2_edma_engine_id_to_edma_id[] = {
1923 [GAUDI2_ENGINE_ID_PDMA_0] = DMA_CORE_ID_PDMA0,
1924 [GAUDI2_ENGINE_ID_PDMA_1] = DMA_CORE_ID_PDMA1,
1925 [GAUDI2_DCORE0_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA0,
1926 [GAUDI2_DCORE0_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA1,
1927 [GAUDI2_DCORE1_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA2,
1928 [GAUDI2_DCORE1_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA3,
1929 [GAUDI2_DCORE2_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA4,
1930 [GAUDI2_DCORE2_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA5,
1931 [GAUDI2_DCORE3_ENGINE_ID_EDMA_0] = DMA_CORE_ID_EDMA6,
1932 [GAUDI2_DCORE3_ENGINE_ID_EDMA_1] = DMA_CORE_ID_EDMA7,
1933 [GAUDI2_ENGINE_ID_KDMA] = DMA_CORE_ID_KDMA,
1934 };
1935
1936 const u32 edma_stream_base[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
1937 GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
1938 GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0,
1939 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
1940 GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0,
1941 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
1942 GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0,
1943 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0,
1944 GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0,
1945 };
1946
1947 static const char gaudi2_vdec_irq_name[GAUDI2_VDEC_MSIX_ENTRIES][GAUDI2_MAX_STRING_LEN] = {
1948 "gaudi2 vdec 0_0", "gaudi2 vdec 0_0 abnormal",
1949 "gaudi2 vdec 0_1", "gaudi2 vdec 0_1 abnormal",
1950 "gaudi2 vdec 1_0", "gaudi2 vdec 1_0 abnormal",
1951 "gaudi2 vdec 1_1", "gaudi2 vdec 1_1 abnormal",
1952 "gaudi2 vdec 2_0", "gaudi2 vdec 2_0 abnormal",
1953 "gaudi2 vdec 2_1", "gaudi2 vdec 2_1 abnormal",
1954 "gaudi2 vdec 3_0", "gaudi2 vdec 3_0 abnormal",
1955 "gaudi2 vdec 3_1", "gaudi2 vdec 3_1 abnormal",
1956 "gaudi2 vdec s_0", "gaudi2 vdec s_0 abnormal",
1957 "gaudi2 vdec s_1", "gaudi2 vdec s_1 abnormal"
1958 };
1959
1960 enum rtr_id {
1961 DCORE0_RTR0,
1962 DCORE0_RTR1,
1963 DCORE0_RTR2,
1964 DCORE0_RTR3,
1965 DCORE0_RTR4,
1966 DCORE0_RTR5,
1967 DCORE0_RTR6,
1968 DCORE0_RTR7,
1969 DCORE1_RTR0,
1970 DCORE1_RTR1,
1971 DCORE1_RTR2,
1972 DCORE1_RTR3,
1973 DCORE1_RTR4,
1974 DCORE1_RTR5,
1975 DCORE1_RTR6,
1976 DCORE1_RTR7,
1977 DCORE2_RTR0,
1978 DCORE2_RTR1,
1979 DCORE2_RTR2,
1980 DCORE2_RTR3,
1981 DCORE2_RTR4,
1982 DCORE2_RTR5,
1983 DCORE2_RTR6,
1984 DCORE2_RTR7,
1985 DCORE3_RTR0,
1986 DCORE3_RTR1,
1987 DCORE3_RTR2,
1988 DCORE3_RTR3,
1989 DCORE3_RTR4,
1990 DCORE3_RTR5,
1991 DCORE3_RTR6,
1992 DCORE3_RTR7,
1993 };
1994
1995 static const u32 gaudi2_tpc_initiator_hbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
1996 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3,
1997 DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4,
1998 DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1,
1999 DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6,
2000 DCORE0_RTR0
2001 };
2002
2003 static const u32 gaudi2_tpc_initiator_lbw_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = {
2004 DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2,
2005 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5,
2006 DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, DCORE2_RTR0, DCORE2_RTR0,
2007 DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, DCORE3_RTR7, DCORE3_RTR7,
2008 DCORE0_RTR0
2009 };
2010
2011 static const u32 gaudi2_dec_initiator_hbw_rtr_id[NUMBER_OF_DEC] = {
2012 DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0,
2013 DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0
2014 };
2015
2016 static const u32 gaudi2_dec_initiator_lbw_rtr_id[NUMBER_OF_DEC] = {
2017 DCORE0_RTR1, DCORE0_RTR1, DCORE1_RTR6, DCORE1_RTR6, DCORE2_RTR1, DCORE2_RTR1,
2018 DCORE3_RTR6, DCORE3_RTR6, DCORE0_RTR0, DCORE0_RTR0
2019 };
2020
2021 static const u32 gaudi2_nic_initiator_hbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2022 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2023 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2024 };
2025
2026 static const u32 gaudi2_nic_initiator_lbw_rtr_id[NIC_NUMBER_OF_MACROS] = {
2027 DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0,
2028 DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7
2029 };
2030
2031 static const u32 gaudi2_edma_initiator_hbw_sft[NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES] = {
2032 mmSFT0_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2033 mmSFT0_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2034 mmSFT1_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2035 mmSFT1_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2036 mmSFT2_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2037 mmSFT2_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE,
2038 mmSFT3_HBW_RTR_IF0_MSTR_IF_RR_SHRD_HBW_BASE,
2039 mmSFT3_HBW_RTR_IF1_MSTR_IF_RR_SHRD_HBW_BASE
2040 };
2041
2042 static const u32 gaudi2_pdma_initiator_hbw_rtr_id[NUM_OF_PDMA] = {
2043 DCORE0_RTR0, DCORE0_RTR0
2044 };
2045
2046 static const u32 gaudi2_pdma_initiator_lbw_rtr_id[NUM_OF_PDMA] = {
2047 DCORE0_RTR2, DCORE0_RTR2
2048 };
2049
2050 static const u32 gaudi2_rot_initiator_hbw_rtr_id[NUM_OF_ROT] = {
2051 DCORE2_RTR0, DCORE3_RTR7
2052 };
2053
2054 static const u32 gaudi2_rot_initiator_lbw_rtr_id[NUM_OF_ROT] = {
2055 DCORE2_RTR2, DCORE3_RTR5
2056 };
2057
2058 struct mme_initiators_rtr_id {
2059 u32 wap0;
2060 u32 wap1;
2061 u32 write;
2062 u32 read;
2063 u32 sbte0;
2064 u32 sbte1;
2065 u32 sbte2;
2066 u32 sbte3;
2067 u32 sbte4;
2068 };
2069
2070 enum mme_initiators {
2071 MME_WAP0 = 0,
2072 MME_WAP1,
2073 MME_WRITE,
2074 MME_READ,
2075 MME_SBTE0,
2076 MME_SBTE1,
2077 MME_SBTE2,
2078 MME_SBTE3,
2079 MME_SBTE4,
2080 MME_INITIATORS_MAX
2081 };
2082
2083 static const struct mme_initiators_rtr_id
2084 gaudi2_mme_initiator_rtr_id[NUM_OF_MME_PER_DCORE * NUM_OF_DCORES] = {
2085 { .wap0 = 5, .wap1 = 7, .write = 6, .read = 7,
2086 .sbte0 = 7, .sbte1 = 4, .sbte2 = 4, .sbte3 = 5, .sbte4 = 6},
2087 { .wap0 = 10, .wap1 = 8, .write = 9, .read = 8,
2088 .sbte0 = 11, .sbte1 = 11, .sbte2 = 10, .sbte3 = 9, .sbte4 = 8},
2089 { .wap0 = 21, .wap1 = 23, .write = 22, .read = 23,
2090 .sbte0 = 20, .sbte1 = 20, .sbte2 = 21, .sbte3 = 22, .sbte4 = 23},
2091 { .wap0 = 30, .wap1 = 28, .write = 29, .read = 30,
2092 .sbte0 = 31, .sbte1 = 31, .sbte2 = 30, .sbte3 = 29, .sbte4 = 28},
2093 };
2094
2095 enum razwi_event_sources {
2096 RAZWI_TPC,
2097 RAZWI_MME,
2098 RAZWI_EDMA,
2099 RAZWI_PDMA,
2100 RAZWI_NIC,
2101 RAZWI_DEC,
2102 RAZWI_ROT,
2103 RAZWI_ARC_FARM
2104 };
2105
2106 struct hbm_mc_error_causes {
2107 u32 mask;
2108 char cause[50];
2109 };
2110
2111 static struct hl_special_block_info gaudi2_special_blocks[] = GAUDI2_SPECIAL_BLOCKS;
2112
2113 /* Special blocks iterator is currently used to configure security protection bits,
2114 * and read global errors. Most HW blocks are addressable and those who aren't (N/A)-
2115 * must be skipped. Following configurations are commonly used for both PB config
2116 * and global error reading, since currently they both share the same settings.
2117 * Once it changes, we must remember to use separate configurations for either one.
2118 */
2119 static int gaudi2_iterator_skip_block_types[] = {
2120 GAUDI2_BLOCK_TYPE_PLL,
2121 GAUDI2_BLOCK_TYPE_EU_BIST,
2122 GAUDI2_BLOCK_TYPE_HBM,
2123 GAUDI2_BLOCK_TYPE_XFT
2124 };
2125
2126 static struct range gaudi2_iterator_skip_block_ranges[] = {
2127 /* Skip all PSOC blocks except for PSOC_GLOBAL_CONF */
2128 {mmPSOC_I2C_M0_BASE, mmPSOC_EFUSE_BASE},
2129 {mmPSOC_BTL_BASE, mmPSOC_MSTR_IF_RR_SHRD_HBW_BASE},
2130 /* Skip all CPU blocks except for CPU_IF */
2131 {mmCPU_CA53_CFG_BASE, mmCPU_CA53_CFG_BASE},
2132 {mmCPU_TIMESTAMP_BASE, mmCPU_MSTR_IF_RR_SHRD_HBW_BASE}
2133 };
2134
2135 static struct hbm_mc_error_causes hbm_mc_spi[GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE] = {
2136 {HBM_MC_SPI_TEMP_PIN_CHG_MASK, "temperature pins changed"},
2137 {HBM_MC_SPI_THR_ENG_MASK, "temperature-based throttling engaged"},
2138 {HBM_MC_SPI_THR_DIS_ENG_MASK, "temperature-based throttling disengaged"},
2139 {HBM_MC_SPI_IEEE1500_COMP_MASK, "IEEE1500 op comp"},
2140 {HBM_MC_SPI_IEEE1500_PAUSED_MASK, "IEEE1500 op paused"},
2141 };
2142
2143 static const char * const hbm_mc_sei_cause[GAUDI2_NUM_OF_HBM_SEI_CAUSE] = {
2144 [HBM_SEI_CMD_PARITY_EVEN] = "SEI C/A parity even",
2145 [HBM_SEI_CMD_PARITY_ODD] = "SEI C/A parity odd",
2146 [HBM_SEI_READ_ERR] = "SEI read data error",
2147 [HBM_SEI_WRITE_DATA_PARITY_ERR] = "SEI write data parity error",
2148 [HBM_SEI_CATTRIP] = "SEI CATTRIP asserted",
2149 [HBM_SEI_MEM_BIST_FAIL] = "SEI memory BIST fail",
2150 [HBM_SEI_DFI] = "SEI DFI error",
2151 [HBM_SEI_INV_TEMP_READ_OUT] = "SEI invalid temp read",
2152 [HBM_SEI_BIST_FAIL] = "SEI BIST fail"
2153 };
2154
2155 struct mmu_spi_sei_cause {
2156 char cause[50];
2157 int clear_bit;
2158 };
2159
2160 static const struct mmu_spi_sei_cause gaudi2_mmu_spi_sei[GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE] = {
2161 {"page fault", 1}, /* INTERRUPT_CLR[1] */
2162 {"page access", 1}, /* INTERRUPT_CLR[1] */
2163 {"bypass ddr", 2}, /* INTERRUPT_CLR[2] */
2164 {"multi hit", 2}, /* INTERRUPT_CLR[2] */
2165 {"mmu rei0", -1}, /* no clear register bit */
2166 {"mmu rei1", -1}, /* no clear register bit */
2167 {"stlb rei0", -1}, /* no clear register bit */
2168 {"stlb rei1", -1}, /* no clear register bit */
2169 {"rr privileged write hit", 2}, /* INTERRUPT_CLR[2] */
2170 {"rr privileged read hit", 2}, /* INTERRUPT_CLR[2] */
2171 {"rr secure write hit", 2}, /* INTERRUPT_CLR[2] */
2172 {"rr secure read hit", 2}, /* INTERRUPT_CLR[2] */
2173 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2174 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2175 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2176 {"bist_fail no use", 2}, /* INTERRUPT_CLR[2] */
2177 {"slave error", 16}, /* INTERRUPT_CLR[16] */
2178 {"dec error", 17}, /* INTERRUPT_CLR[17] */
2179 {"burst fifo full", 2} /* INTERRUPT_CLR[2] */
2180 };
2181
2182 struct gaudi2_cache_invld_params {
2183 u64 start_va;
2184 u64 end_va;
2185 u32 inv_start_val;
2186 u32 flags;
2187 bool range_invalidation;
2188 };
2189
2190 struct gaudi2_tpc_idle_data {
2191 struct engines_data *e;
2192 unsigned long *mask;
2193 bool *is_idle;
2194 const char *tpc_fmt;
2195 };
2196
2197 struct gaudi2_tpc_mmu_data {
2198 u32 rw_asid;
2199 };
2200
2201 static s64 gaudi2_state_dump_specs_props[SP_MAX] = {0};
2202
2203 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val);
2204 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id);
2205 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id);
2206 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2207 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id);
2208 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val);
2209 static int gaudi2_send_job_to_kdma(struct hl_device *hdev, u64 src_addr, u64 dst_addr, u32 size,
2210 bool is_memset);
2211 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2212 struct engines_data *e);
2213 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2214 struct engines_data *e);
2215 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
2216 struct engines_data *e);
2217 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr);
2218 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr);
2219
gaudi2_init_scrambler_hbm(struct hl_device * hdev)2220 static void gaudi2_init_scrambler_hbm(struct hl_device *hdev)
2221 {
2222
2223 }
2224
gaudi2_get_signal_cb_size(struct hl_device * hdev)2225 static u32 gaudi2_get_signal_cb_size(struct hl_device *hdev)
2226 {
2227 return sizeof(struct packet_msg_short);
2228 }
2229
gaudi2_get_wait_cb_size(struct hl_device * hdev)2230 static u32 gaudi2_get_wait_cb_size(struct hl_device *hdev)
2231 {
2232 return sizeof(struct packet_msg_short) * 4 + sizeof(struct packet_fence);
2233 }
2234
gaudi2_iterate_tpcs(struct hl_device * hdev,struct iterate_module_ctx * ctx)2235 void gaudi2_iterate_tpcs(struct hl_device *hdev, struct iterate_module_ctx *ctx)
2236 {
2237 struct asic_fixed_properties *prop = &hdev->asic_prop;
2238 int dcore, inst, tpc_seq;
2239 u32 offset;
2240
2241 /* init the return code */
2242 ctx->rc = 0;
2243
2244 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) {
2245 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) {
2246 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
2247
2248 if (!(prop->tpc_enabled_mask & BIT(tpc_seq)))
2249 continue;
2250
2251 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst);
2252
2253 ctx->fn(hdev, dcore, inst, offset, ctx);
2254 if (ctx->rc) {
2255 dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n",
2256 dcore, inst);
2257 return;
2258 }
2259 }
2260 }
2261
2262 if (!(prop->tpc_enabled_mask & BIT(TPC_ID_DCORE0_TPC6)))
2263 return;
2264
2265 /* special check for PCI TPC (DCORE0_TPC6) */
2266 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1);
2267 ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx);
2268 if (ctx->rc)
2269 dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n");
2270 }
2271
gaudi2_host_phys_addr_valid(u64 addr)2272 static bool gaudi2_host_phys_addr_valid(u64 addr)
2273 {
2274 if ((addr < HOST_PHYS_BASE_0 + HOST_PHYS_SIZE_0) || (addr >= HOST_PHYS_BASE_1))
2275 return true;
2276
2277 return false;
2278 }
2279
set_number_of_functional_hbms(struct hl_device * hdev)2280 static int set_number_of_functional_hbms(struct hl_device *hdev)
2281 {
2282 struct asic_fixed_properties *prop = &hdev->asic_prop;
2283 u8 faulty_hbms = hweight64(hdev->dram_binning);
2284
2285 /* check if all HBMs should be used */
2286 if (!faulty_hbms) {
2287 dev_dbg(hdev->dev, "All HBM are in use (no binning)\n");
2288 prop->num_functional_hbms = GAUDI2_HBM_NUM;
2289 return 0;
2290 }
2291
2292 /*
2293 * check for error condition in which number of binning
2294 * candidates is higher than the maximum supported by the
2295 * driver (in which case binning mask shall be ignored and driver will
2296 * set the default)
2297 */
2298 if (faulty_hbms > MAX_FAULTY_HBMS) {
2299 dev_err(hdev->dev,
2300 "HBM binning supports max of %d faulty HBMs, supplied mask 0x%llx.\n",
2301 MAX_FAULTY_HBMS, hdev->dram_binning);
2302 return -EINVAL;
2303 }
2304
2305 /*
2306 * by default, number of functional HBMs in Gaudi2 is always
2307 * GAUDI2_HBM_NUM - 1.
2308 */
2309 prop->num_functional_hbms = GAUDI2_HBM_NUM - faulty_hbms;
2310 return 0;
2311 }
2312
gaudi2_is_edma_queue_id(u32 queue_id)2313 static bool gaudi2_is_edma_queue_id(u32 queue_id)
2314 {
2315
2316 switch (queue_id) {
2317 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
2318 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
2319 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
2320 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
2321 return true;
2322 default:
2323 return false;
2324 }
2325 }
2326
gaudi2_set_dram_properties(struct hl_device * hdev)2327 static int gaudi2_set_dram_properties(struct hl_device *hdev)
2328 {
2329 struct asic_fixed_properties *prop = &hdev->asic_prop;
2330 u64 hbm_drv_base_offset = 0, edma_pq_base_addr;
2331 u32 basic_hbm_page_size, edma_idx = 0;
2332 int rc, i;
2333
2334 rc = set_number_of_functional_hbms(hdev);
2335 if (rc)
2336 return -EINVAL;
2337
2338 /*
2339 * Due to HW bug in which TLB size is x16 smaller than expected we use a workaround
2340 * in which we are using x16 bigger page size to be able to populate the entire
2341 * HBM mappings in the TLB
2342 */
2343 basic_hbm_page_size = prop->num_functional_hbms * SZ_8M;
2344 prop->dram_page_size = GAUDI2_COMPENSATE_TLB_PAGE_SIZE_FACTOR * basic_hbm_page_size;
2345 prop->device_mem_alloc_default_page_size = prop->dram_page_size;
2346 prop->dram_size = prop->num_functional_hbms * SZ_16G;
2347 prop->dram_base_address = DRAM_PHYS_BASE;
2348 prop->dram_end_address = prop->dram_base_address + prop->dram_size;
2349 prop->dram_supports_virtual_memory = true;
2350
2351 prop->dram_user_base_address = DRAM_PHYS_BASE + prop->dram_page_size;
2352 prop->dram_hints_align_mask = ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK;
2353 prop->hints_dram_reserved_va_range.start_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_START;
2354 prop->hints_dram_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HBM_END;
2355
2356 /* since DRAM page size differs from DMMU page size we need to allocate
2357 * DRAM memory in units of dram_page size and mapping this memory in
2358 * units of DMMU page size. we overcome this size mismatch using a
2359 * scrambling routine which takes a DRAM page and converts it to a DMMU
2360 * page.
2361 * We therefore:
2362 * 1. partition the virtual address space to DRAM-page (whole) pages.
2363 * (suppose we get n such pages)
2364 * 2. limit the amount of virtual address space we got from 1 above to
2365 * a multiple of 64M as we don't want the scrambled address to cross
2366 * the DRAM virtual address space.
2367 * ( m = (n * DRAM_page_size) / DMMU_page_size).
2368 * 3. determine the and address accordingly
2369 * end_addr = start_addr + m * 48M
2370 *
2371 * the DRAM address MSBs (63:48) are not part of the roundup calculation
2372 */
2373 prop->dmmu.start_addr = prop->dram_base_address +
2374 (prop->dram_page_size *
2375 DIV_ROUND_UP_SECTOR_T(prop->dram_size, prop->dram_page_size));
2376 prop->dmmu.end_addr = prop->dmmu.start_addr + prop->dram_page_size *
2377 div_u64((VA_HBM_SPACE_END - prop->dmmu.start_addr), prop->dmmu.page_size);
2378 /*
2379 * Driver can't share an (48MB) HBM page with the F/W in order to prevent FW to block
2380 * the driver part by range register, so it must start at the next (48MB) page
2381 */
2382 hbm_drv_base_offset = roundup(CPU_FW_IMAGE_SIZE, prop->num_functional_hbms * SZ_8M);
2383
2384 /*
2385 * The NIC driver section size and the HMMU page tables section in the HBM needs
2386 * to be the remaining size in the first dram page after taking into
2387 * account the F/W image size
2388 */
2389
2390 /* Reserve region in HBM for HMMU page tables */
2391 prop->mmu_pgt_addr = DRAM_PHYS_BASE + hbm_drv_base_offset +
2392 ((prop->dram_page_size - hbm_drv_base_offset) -
2393 (HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE + EDMA_SCRATCHPAD_SIZE));
2394
2395 /* Set EDMA PQs HBM addresses */
2396 edma_pq_base_addr = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE;
2397
2398 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2399 if (gaudi2_is_edma_queue_id(i)) {
2400 prop->hw_queues_props[i].q_dram_bd_address = edma_pq_base_addr +
2401 (edma_idx * HL_QUEUE_SIZE_IN_BYTES);
2402 edma_idx++;
2403 }
2404 }
2405
2406 return 0;
2407 }
2408
gaudi2_set_fixed_properties(struct hl_device * hdev)2409 static int gaudi2_set_fixed_properties(struct hl_device *hdev)
2410 {
2411 struct asic_fixed_properties *prop = &hdev->asic_prop;
2412 struct hw_queue_properties *q_props;
2413 u32 num_sync_stream_queues = 0;
2414 int i, rc;
2415
2416 prop->max_queues = GAUDI2_QUEUE_ID_SIZE;
2417 prop->hw_queues_props = kcalloc(prop->max_queues, sizeof(struct hw_queue_properties),
2418 GFP_KERNEL);
2419
2420 if (!prop->hw_queues_props)
2421 return -ENOMEM;
2422
2423 q_props = prop->hw_queues_props;
2424
2425 for (i = 0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i++) {
2426 q_props[i].type = QUEUE_TYPE_HW;
2427 q_props[i].driver_only = 0;
2428
2429 if (i >= GAUDI2_QUEUE_ID_NIC_0_0 && i <= GAUDI2_QUEUE_ID_NIC_23_3) {
2430 q_props[i].supports_sync_stream = 0;
2431 } else {
2432 q_props[i].supports_sync_stream = 1;
2433 num_sync_stream_queues++;
2434 }
2435
2436 q_props[i].cb_alloc_flags = CB_ALLOC_USER;
2437
2438 if (gaudi2_is_edma_queue_id(i))
2439 q_props[i].dram_bd = 1;
2440 }
2441
2442 q_props[GAUDI2_QUEUE_ID_CPU_PQ].type = QUEUE_TYPE_CPU;
2443 q_props[GAUDI2_QUEUE_ID_CPU_PQ].driver_only = 1;
2444 q_props[GAUDI2_QUEUE_ID_CPU_PQ].cb_alloc_flags = CB_ALLOC_KERNEL;
2445
2446 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE;
2447 prop->cfg_base_address = CFG_BASE;
2448 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE_0;
2449 prop->host_base_address = HOST_PHYS_BASE_0;
2450 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE_0;
2451 prop->max_pending_cs = GAUDI2_MAX_PENDING_CS;
2452 prop->completion_queues_count = GAUDI2_RESERVED_CQ_NUMBER;
2453 prop->user_dec_intr_count = NUMBER_OF_DEC;
2454 prop->user_interrupt_count = GAUDI2_IRQ_NUM_USER_LAST - GAUDI2_IRQ_NUM_USER_FIRST + 1;
2455 prop->completion_mode = HL_COMPLETION_MODE_CS;
2456 prop->sync_stream_first_sob = GAUDI2_RESERVED_SOB_NUMBER;
2457 prop->sync_stream_first_mon = GAUDI2_RESERVED_MON_NUMBER;
2458
2459 prop->sram_base_address = SRAM_BASE_ADDR;
2460 prop->sram_size = SRAM_SIZE;
2461 prop->sram_end_address = prop->sram_base_address + prop->sram_size;
2462 prop->sram_user_base_address = prop->sram_base_address + SRAM_USER_BASE_OFFSET;
2463
2464 prop->hints_range_reservation = true;
2465
2466 prop->rotator_enabled_mask = BIT(NUM_OF_ROT) - 1;
2467
2468 prop->max_asid = 2;
2469
2470 prop->dmmu.pgt_size = HMMU_PAGE_TABLES_SIZE;
2471 prop->mmu_pte_size = HL_PTE_SIZE;
2472
2473 prop->dmmu.hop_shifts[MMU_HOP0] = DHOP0_SHIFT;
2474 prop->dmmu.hop_shifts[MMU_HOP1] = DHOP1_SHIFT;
2475 prop->dmmu.hop_shifts[MMU_HOP2] = DHOP2_SHIFT;
2476 prop->dmmu.hop_shifts[MMU_HOP3] = DHOP3_SHIFT;
2477 prop->dmmu.hop_masks[MMU_HOP0] = DHOP0_MASK;
2478 prop->dmmu.hop_masks[MMU_HOP1] = DHOP1_MASK;
2479 prop->dmmu.hop_masks[MMU_HOP2] = DHOP2_MASK;
2480 prop->dmmu.hop_masks[MMU_HOP3] = DHOP3_MASK;
2481 prop->dmmu.page_size = PAGE_SIZE_1GB;
2482 prop->dmmu.num_hops = MMU_ARCH_4_HOPS;
2483 prop->dmmu.last_mask = LAST_MASK;
2484 prop->dmmu.host_resident = 0;
2485 prop->dmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2486 prop->dmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2487
2488 /* As we need to set the pgt address in dram for HMMU init so we cannot
2489 * wait to the fw cpucp info to set the dram props as mmu init comes before
2490 * hw init
2491 */
2492 rc = hdev->asic_funcs->set_dram_properties(hdev);
2493 if (rc)
2494 goto free_qprops;
2495
2496 prop->mmu_pgt_size = PMMU_PAGE_TABLES_SIZE;
2497
2498 prop->pmmu.pgt_size = prop->mmu_pgt_size;
2499 hdev->pmmu_huge_range = true;
2500 prop->pmmu.host_resident = 1;
2501 prop->pmmu.num_hops = MMU_ARCH_6_HOPS;
2502 prop->pmmu.last_mask = LAST_MASK;
2503 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE;
2504 prop->pmmu.hop0_tables_total_size = HOP_TABLE_SIZE_512_PTE * prop->max_asid;
2505
2506 prop->hints_host_reserved_va_range.start_addr = RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START;
2507 prop->hints_host_reserved_va_range.end_addr = RESERVED_VA_RANGE_FOR_ARC_ON_HOST_END;
2508 prop->hints_host_hpage_reserved_va_range.start_addr =
2509 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_START;
2510 prop->hints_host_hpage_reserved_va_range.end_addr =
2511 RESERVED_VA_RANGE_FOR_ARC_ON_HOST_HPAGE_END;
2512
2513 if (PAGE_SIZE == SZ_64K) {
2514 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_64K;
2515 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_64K;
2516 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_64K;
2517 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_64K;
2518 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_64K;
2519 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_64K;
2520 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_64K;
2521 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_64K;
2522 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_64K;
2523 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_64K;
2524 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_64K;
2525 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_64K;
2526 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2527 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2528 prop->pmmu.page_size = PAGE_SIZE_64KB;
2529
2530 /* shifts and masks are the same in PMMU and HPMMU */
2531 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2532 prop->pmmu_huge.page_size = PAGE_SIZE_16MB;
2533 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2534 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2535 } else {
2536 prop->pmmu.hop_shifts[MMU_HOP0] = HOP0_SHIFT_4K;
2537 prop->pmmu.hop_shifts[MMU_HOP1] = HOP1_SHIFT_4K;
2538 prop->pmmu.hop_shifts[MMU_HOP2] = HOP2_SHIFT_4K;
2539 prop->pmmu.hop_shifts[MMU_HOP3] = HOP3_SHIFT_4K;
2540 prop->pmmu.hop_shifts[MMU_HOP4] = HOP4_SHIFT_4K;
2541 prop->pmmu.hop_shifts[MMU_HOP5] = HOP5_SHIFT_4K;
2542 prop->pmmu.hop_masks[MMU_HOP0] = HOP0_MASK_4K;
2543 prop->pmmu.hop_masks[MMU_HOP1] = HOP1_MASK_4K;
2544 prop->pmmu.hop_masks[MMU_HOP2] = HOP2_MASK_4K;
2545 prop->pmmu.hop_masks[MMU_HOP3] = HOP3_MASK_4K;
2546 prop->pmmu.hop_masks[MMU_HOP4] = HOP4_MASK_4K;
2547 prop->pmmu.hop_masks[MMU_HOP5] = HOP5_MASK_4K;
2548 prop->pmmu.start_addr = VA_HOST_SPACE_PAGE_START;
2549 prop->pmmu.end_addr = VA_HOST_SPACE_PAGE_END;
2550 prop->pmmu.page_size = PAGE_SIZE_4KB;
2551
2552 /* shifts and masks are the same in PMMU and HPMMU */
2553 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
2554 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
2555 prop->pmmu_huge.start_addr = VA_HOST_SPACE_HPAGE_START;
2556 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END;
2557 }
2558
2559 prop->max_num_of_engines = GAUDI2_ENGINE_ID_SIZE;
2560 prop->num_engine_cores = CPU_ID_MAX;
2561 prop->cfg_size = CFG_SIZE;
2562 prop->num_of_events = GAUDI2_EVENT_SIZE;
2563
2564 prop->supports_engine_modes = true;
2565
2566 prop->dc_power_default = DC_POWER_DEFAULT;
2567
2568 prop->cb_pool_cb_cnt = GAUDI2_CB_POOL_CB_CNT;
2569 prop->cb_pool_cb_size = GAUDI2_CB_POOL_CB_SIZE;
2570 prop->pcie_dbi_base_address = CFG_BASE + mmPCIE_DBI_BASE;
2571 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
2572
2573 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME, CARD_NAME_MAX_LEN);
2574
2575 prop->mme_master_slave_mode = 1;
2576
2577 prop->first_available_user_sob[0] = GAUDI2_RESERVED_SOB_NUMBER +
2578 (num_sync_stream_queues * HL_RSVD_SOBS);
2579
2580 prop->first_available_user_mon[0] = GAUDI2_RESERVED_MON_NUMBER +
2581 (num_sync_stream_queues * HL_RSVD_MONS);
2582
2583 prop->first_available_user_interrupt = GAUDI2_IRQ_NUM_USER_FIRST;
2584 prop->tpc_interrupt_id = GAUDI2_IRQ_NUM_TPC_ASSERT;
2585 prop->eq_interrupt_id = GAUDI2_IRQ_NUM_EVENT_QUEUE;
2586
2587 prop->first_available_cq[0] = GAUDI2_RESERVED_CQ_NUMBER;
2588
2589 prop->fw_cpu_boot_dev_sts0_valid = false;
2590 prop->fw_cpu_boot_dev_sts1_valid = false;
2591 prop->hard_reset_done_by_fw = false;
2592 prop->gic_interrupts_enable = true;
2593
2594 prop->server_type = HL_SERVER_TYPE_UNKNOWN;
2595
2596 prop->max_dec = NUMBER_OF_DEC;
2597
2598 prop->clk_pll_index = HL_GAUDI2_MME_PLL;
2599
2600 prop->dma_mask = 64;
2601
2602 prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0;
2603
2604 prop->supports_advanced_cpucp_rc = true;
2605
2606 return 0;
2607
2608 free_qprops:
2609 kfree(prop->hw_queues_props);
2610 return rc;
2611 }
2612
gaudi2_pci_bars_map(struct hl_device * hdev)2613 static int gaudi2_pci_bars_map(struct hl_device *hdev)
2614 {
2615 static const char * const name[] = {"CFG_SRAM", "MSIX", "DRAM"};
2616 bool is_wc[3] = {false, false, true};
2617 int rc;
2618
2619 rc = hl_pci_bars_map(hdev, name, is_wc);
2620 if (rc)
2621 return rc;
2622
2623 hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] + (CFG_BASE - STM_FLASH_BASE_ADDR);
2624
2625 return 0;
2626 }
2627
gaudi2_set_hbm_bar_base(struct hl_device * hdev,u64 addr)2628 static u64 gaudi2_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
2629 {
2630 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2631 struct hl_inbound_pci_region pci_region;
2632 u64 old_addr = addr;
2633 int rc;
2634
2635 if ((gaudi2) && (gaudi2->dram_bar_cur_addr == addr))
2636 return old_addr;
2637
2638 if (hdev->asic_prop.iatu_done_by_fw)
2639 return U64_MAX;
2640
2641 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2642 pci_region.mode = PCI_BAR_MATCH_MODE;
2643 pci_region.bar = DRAM_BAR_ID;
2644 pci_region.addr = addr;
2645 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
2646 if (rc)
2647 return U64_MAX;
2648
2649 if (gaudi2) {
2650 old_addr = gaudi2->dram_bar_cur_addr;
2651 gaudi2->dram_bar_cur_addr = addr;
2652 }
2653
2654 return old_addr;
2655 }
2656
gaudi2_init_iatu(struct hl_device * hdev)2657 static int gaudi2_init_iatu(struct hl_device *hdev)
2658 {
2659 struct hl_inbound_pci_region inbound_region;
2660 struct hl_outbound_pci_region outbound_region;
2661 u32 bar_addr_low, bar_addr_high;
2662 int rc;
2663
2664 if (hdev->asic_prop.iatu_done_by_fw)
2665 return 0;
2666
2667 /* Temporary inbound Region 0 - Bar 0 - Point to CFG
2668 * We must map this region in BAR match mode in order to
2669 * fetch BAR physical base address
2670 */
2671 inbound_region.mode = PCI_BAR_MATCH_MODE;
2672 inbound_region.bar = SRAM_CFG_BAR_ID;
2673 /* Base address must be aligned to Bar size which is 256 MB */
2674 inbound_region.addr = STM_FLASH_BASE_ADDR - STM_FLASH_ALIGNED_OFF;
2675 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2676 if (rc)
2677 return rc;
2678
2679 /* Fetch physical BAR address */
2680 bar_addr_high = RREG32(mmPCIE_DBI_BAR1_REG + STM_FLASH_ALIGNED_OFF);
2681 bar_addr_low = RREG32(mmPCIE_DBI_BAR0_REG + STM_FLASH_ALIGNED_OFF) & ~0xF;
2682
2683 hdev->pcie_bar_phys[SRAM_CFG_BAR_ID] = (u64)bar_addr_high << 32 | bar_addr_low;
2684
2685 /* Inbound Region 0 - Bar 0 - Point to CFG */
2686 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2687 inbound_region.bar = SRAM_CFG_BAR_ID;
2688 inbound_region.offset_in_bar = 0;
2689 inbound_region.addr = STM_FLASH_BASE_ADDR;
2690 inbound_region.size = CFG_REGION_SIZE;
2691 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
2692 if (rc)
2693 return rc;
2694
2695 /* Inbound Region 1 - Bar 0 - Point to BAR0_RESERVED + SRAM */
2696 inbound_region.mode = PCI_ADDRESS_MATCH_MODE;
2697 inbound_region.bar = SRAM_CFG_BAR_ID;
2698 inbound_region.offset_in_bar = CFG_REGION_SIZE;
2699 inbound_region.addr = BAR0_RSRVD_BASE_ADDR;
2700 inbound_region.size = BAR0_RSRVD_SIZE + SRAM_SIZE;
2701 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
2702 if (rc)
2703 return rc;
2704
2705 /* Inbound Region 2 - Bar 4 - Point to DRAM */
2706 inbound_region.mode = PCI_BAR_MATCH_MODE;
2707 inbound_region.bar = DRAM_BAR_ID;
2708 inbound_region.addr = DRAM_PHYS_BASE;
2709 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
2710 if (rc)
2711 return rc;
2712
2713 /* Outbound Region 0 - Point to Host */
2714 outbound_region.addr = HOST_PHYS_BASE_0;
2715 outbound_region.size = HOST_PHYS_SIZE_0;
2716 rc = hl_pci_set_outbound_region(hdev, &outbound_region);
2717
2718 return rc;
2719 }
2720
gaudi2_get_hw_state(struct hl_device * hdev)2721 static enum hl_device_hw_state gaudi2_get_hw_state(struct hl_device *hdev)
2722 {
2723 return RREG32(mmHW_STATE);
2724 }
2725
gaudi2_tpc_binning_init_prop(struct hl_device * hdev)2726 static int gaudi2_tpc_binning_init_prop(struct hl_device *hdev)
2727 {
2728 struct asic_fixed_properties *prop = &hdev->asic_prop;
2729
2730 /*
2731 * check for error condition in which number of binning candidates
2732 * is higher than the maximum supported by the driver
2733 */
2734 if (hweight64(hdev->tpc_binning) > MAX_CLUSTER_BINNING_FAULTY_TPCS) {
2735 dev_err(hdev->dev, "TPC binning is supported for max of %d faulty TPCs, provided mask 0x%llx\n",
2736 MAX_CLUSTER_BINNING_FAULTY_TPCS,
2737 hdev->tpc_binning);
2738 return -EINVAL;
2739 }
2740
2741 prop->tpc_binning_mask = hdev->tpc_binning;
2742 prop->tpc_enabled_mask = GAUDI2_TPC_FULL_MASK;
2743
2744 return 0;
2745 }
2746
gaudi2_set_tpc_binning_masks(struct hl_device * hdev)2747 static int gaudi2_set_tpc_binning_masks(struct hl_device *hdev)
2748 {
2749 struct asic_fixed_properties *prop = &hdev->asic_prop;
2750 struct hw_queue_properties *q_props = prop->hw_queues_props;
2751 u64 tpc_binning_mask;
2752 u8 subst_idx = 0;
2753 int i, rc;
2754
2755 rc = gaudi2_tpc_binning_init_prop(hdev);
2756 if (rc)
2757 return rc;
2758
2759 tpc_binning_mask = prop->tpc_binning_mask;
2760
2761 for (i = 0 ; i < MAX_FAULTY_TPCS ; i++) {
2762 u8 subst_seq, binned, qid_base;
2763
2764 if (tpc_binning_mask == 0)
2765 break;
2766
2767 if (subst_idx == 0) {
2768 subst_seq = TPC_ID_DCORE0_TPC6;
2769 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
2770 } else {
2771 subst_seq = TPC_ID_DCORE3_TPC5;
2772 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_5_0;
2773 }
2774
2775
2776 /* clear bit from mask */
2777 binned = __ffs(tpc_binning_mask);
2778 /*
2779 * Coverity complains about possible out-of-bound access in
2780 * clear_bit
2781 */
2782 if (binned >= TPC_ID_SIZE) {
2783 dev_err(hdev->dev,
2784 "Invalid binned TPC (binning mask: %llx)\n",
2785 tpc_binning_mask);
2786 return -EINVAL;
2787 }
2788 clear_bit(binned, (unsigned long *)&tpc_binning_mask);
2789
2790 /* also clear replacing TPC bit from enabled mask */
2791 clear_bit(subst_seq, (unsigned long *)&prop->tpc_enabled_mask);
2792
2793 /* bin substite TPC's Qs */
2794 q_props[qid_base].binned = 1;
2795 q_props[qid_base + 1].binned = 1;
2796 q_props[qid_base + 2].binned = 1;
2797 q_props[qid_base + 3].binned = 1;
2798
2799 subst_idx++;
2800 }
2801
2802 return 0;
2803 }
2804
gaudi2_set_dec_binning_masks(struct hl_device * hdev)2805 static int gaudi2_set_dec_binning_masks(struct hl_device *hdev)
2806 {
2807 struct asic_fixed_properties *prop = &hdev->asic_prop;
2808 u8 num_faulty;
2809
2810 num_faulty = hweight32(hdev->decoder_binning);
2811
2812 /*
2813 * check for error condition in which number of binning candidates
2814 * is higher than the maximum supported by the driver
2815 */
2816 if (num_faulty > MAX_FAULTY_DECODERS) {
2817 dev_err(hdev->dev, "decoder binning is supported for max of single faulty decoder, provided mask 0x%x\n",
2818 hdev->decoder_binning);
2819 return -EINVAL;
2820 }
2821
2822 prop->decoder_binning_mask = (hdev->decoder_binning & GAUDI2_DECODER_FULL_MASK);
2823
2824 if (prop->decoder_binning_mask)
2825 prop->decoder_enabled_mask = (GAUDI2_DECODER_FULL_MASK & ~BIT(DEC_ID_PCIE_VDEC1));
2826 else
2827 prop->decoder_enabled_mask = GAUDI2_DECODER_FULL_MASK;
2828
2829 return 0;
2830 }
2831
gaudi2_set_dram_binning_masks(struct hl_device * hdev)2832 static void gaudi2_set_dram_binning_masks(struct hl_device *hdev)
2833 {
2834 struct asic_fixed_properties *prop = &hdev->asic_prop;
2835
2836 /* check if we should override default binning */
2837 if (!hdev->dram_binning) {
2838 prop->dram_binning_mask = 0;
2839 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK;
2840 return;
2841 }
2842
2843 /* set DRAM binning constraints */
2844 prop->faulty_dram_cluster_map |= hdev->dram_binning;
2845 prop->dram_binning_mask = hdev->dram_binning;
2846 prop->dram_enabled_mask = GAUDI2_DRAM_FULL_MASK & ~BIT(HBM_ID5);
2847 }
2848
gaudi2_set_edma_binning_masks(struct hl_device * hdev)2849 static int gaudi2_set_edma_binning_masks(struct hl_device *hdev)
2850 {
2851 struct asic_fixed_properties *prop = &hdev->asic_prop;
2852 struct hw_queue_properties *q_props;
2853 u8 seq, num_faulty;
2854
2855 num_faulty = hweight32(hdev->edma_binning);
2856
2857 /*
2858 * check for error condition in which number of binning candidates
2859 * is higher than the maximum supported by the driver
2860 */
2861 if (num_faulty > MAX_FAULTY_EDMAS) {
2862 dev_err(hdev->dev,
2863 "EDMA binning is supported for max of single faulty EDMA, provided mask 0x%x\n",
2864 hdev->edma_binning);
2865 return -EINVAL;
2866 }
2867
2868 if (!hdev->edma_binning) {
2869 prop->edma_binning_mask = 0;
2870 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK;
2871 return 0;
2872 }
2873
2874 seq = __ffs((unsigned long)hdev->edma_binning);
2875
2876 /* set binning constraints */
2877 prop->faulty_dram_cluster_map |= BIT(edma_to_hbm_cluster[seq]);
2878 prop->edma_binning_mask = hdev->edma_binning;
2879 prop->edma_enabled_mask = GAUDI2_EDMA_FULL_MASK & ~BIT(EDMA_ID_DCORE3_INSTANCE1);
2880
2881 /* bin substitute EDMA's queue */
2882 q_props = prop->hw_queues_props;
2883 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0].binned = 1;
2884 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1].binned = 1;
2885 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2].binned = 1;
2886 q_props[GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3].binned = 1;
2887
2888 return 0;
2889 }
2890
gaudi2_set_xbar_edge_enable_mask(struct hl_device * hdev,u32 xbar_edge_iso_mask)2891 static int gaudi2_set_xbar_edge_enable_mask(struct hl_device *hdev, u32 xbar_edge_iso_mask)
2892 {
2893 struct asic_fixed_properties *prop = &hdev->asic_prop;
2894 u8 num_faulty, seq;
2895
2896 /* check if we should override default binning */
2897 if (!xbar_edge_iso_mask) {
2898 prop->xbar_edge_enabled_mask = GAUDI2_XBAR_EDGE_FULL_MASK;
2899 return 0;
2900 }
2901
2902 /*
2903 * note that it can be set to value other than 0 only after cpucp packet (i.e.
2904 * only the FW can set a redundancy value). for user it'll always be 0.
2905 */
2906 num_faulty = hweight32(xbar_edge_iso_mask);
2907
2908 /*
2909 * check for error condition in which number of binning candidates
2910 * is higher than the maximum supported by the driver
2911 */
2912 if (num_faulty > MAX_FAULTY_XBARS) {
2913 dev_err(hdev->dev, "we cannot have more than %d faulty XBAR EDGE\n",
2914 MAX_FAULTY_XBARS);
2915 return -EINVAL;
2916 }
2917
2918 seq = __ffs((unsigned long)xbar_edge_iso_mask);
2919
2920 /* set binning constraints */
2921 prop->faulty_dram_cluster_map |= BIT(xbar_edge_to_hbm_cluster[seq]);
2922 prop->xbar_edge_enabled_mask = (~xbar_edge_iso_mask) & GAUDI2_XBAR_EDGE_FULL_MASK;
2923
2924 return 0;
2925 }
2926
gaudi2_set_cluster_binning_masks_common(struct hl_device * hdev,u8 xbar_edge_iso_mask)2927 static int gaudi2_set_cluster_binning_masks_common(struct hl_device *hdev, u8 xbar_edge_iso_mask)
2928 {
2929 int rc;
2930
2931 /*
2932 * mark all clusters as good, each component will "fail" cluster
2933 * based on eFuse/user values.
2934 * If more than single cluster is faulty- the chip is unusable
2935 */
2936 hdev->asic_prop.faulty_dram_cluster_map = 0;
2937
2938 gaudi2_set_dram_binning_masks(hdev);
2939
2940 rc = gaudi2_set_edma_binning_masks(hdev);
2941 if (rc)
2942 return rc;
2943
2944 rc = gaudi2_set_xbar_edge_enable_mask(hdev, xbar_edge_iso_mask);
2945 if (rc)
2946 return rc;
2947
2948
2949 /* always initially set to full mask */
2950 hdev->asic_prop.hmmu_hif_enabled_mask = GAUDI2_HIF_HMMU_FULL_MASK;
2951
2952 return 0;
2953 }
2954
gaudi2_set_cluster_binning_masks(struct hl_device * hdev)2955 static int gaudi2_set_cluster_binning_masks(struct hl_device *hdev)
2956 {
2957 struct asic_fixed_properties *prop = &hdev->asic_prop;
2958 int rc;
2959
2960 rc = gaudi2_set_cluster_binning_masks_common(hdev, prop->cpucp_info.xbar_binning_mask);
2961 if (rc)
2962 return rc;
2963
2964 /* if we have DRAM binning reported by FW we should perform cluster config */
2965 if (prop->faulty_dram_cluster_map) {
2966 u8 cluster_seq = __ffs((unsigned long)prop->faulty_dram_cluster_map);
2967
2968 prop->hmmu_hif_enabled_mask = cluster_hmmu_hif_enabled_mask[cluster_seq];
2969 }
2970
2971 return 0;
2972 }
2973
gaudi2_set_binning_masks(struct hl_device * hdev)2974 static int gaudi2_set_binning_masks(struct hl_device *hdev)
2975 {
2976 int rc;
2977
2978 rc = gaudi2_set_cluster_binning_masks(hdev);
2979 if (rc)
2980 return rc;
2981
2982 rc = gaudi2_set_tpc_binning_masks(hdev);
2983 if (rc)
2984 return rc;
2985
2986 rc = gaudi2_set_dec_binning_masks(hdev);
2987 if (rc)
2988 return rc;
2989
2990 return 0;
2991 }
2992
gaudi2_cpucp_info_get(struct hl_device * hdev)2993 static int gaudi2_cpucp_info_get(struct hl_device *hdev)
2994 {
2995 struct gaudi2_device *gaudi2 = hdev->asic_specific;
2996 struct asic_fixed_properties *prop = &hdev->asic_prop;
2997 long max_power;
2998 u64 dram_size;
2999 int rc;
3000
3001 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3002 return 0;
3003
3004 /* No point of asking this information again when not doing hard reset, as the device
3005 * CPU hasn't been reset
3006 */
3007 if (hdev->reset_info.in_compute_reset)
3008 return 0;
3009
3010 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
3011 mmCPU_BOOT_ERR1);
3012 if (rc)
3013 return rc;
3014
3015 dram_size = le64_to_cpu(prop->cpucp_info.dram_size);
3016 if (dram_size) {
3017 /* we can have wither 5 or 6 HBMs. other values are invalid */
3018
3019 if ((dram_size != ((GAUDI2_HBM_NUM - 1) * SZ_16G)) &&
3020 (dram_size != (GAUDI2_HBM_NUM * SZ_16G))) {
3021 dev_err(hdev->dev,
3022 "F/W reported invalid DRAM size %llu. Trying to use default size %llu\n",
3023 dram_size, prop->dram_size);
3024 dram_size = prop->dram_size;
3025 }
3026
3027 prop->dram_size = dram_size;
3028 prop->dram_end_address = prop->dram_base_address + dram_size;
3029 }
3030
3031 if (!strlen(prop->cpucp_info.card_name))
3032 strscpy_pad(prop->cpucp_info.card_name, GAUDI2_DEFAULT_CARD_NAME,
3033 CARD_NAME_MAX_LEN);
3034
3035 /* Overwrite binning masks with the actual binning values from F/W */
3036 hdev->dram_binning = prop->cpucp_info.dram_binning_mask;
3037 hdev->edma_binning = prop->cpucp_info.edma_binning_mask;
3038 hdev->tpc_binning = le64_to_cpu(prop->cpucp_info.tpc_binning_mask);
3039 hdev->decoder_binning = lower_32_bits(le64_to_cpu(prop->cpucp_info.decoder_binning_mask));
3040
3041 dev_dbg(hdev->dev, "Read binning masks: tpc: 0x%llx, dram: 0x%llx, edma: 0x%x, dec: 0x%x\n",
3042 hdev->tpc_binning, hdev->dram_binning, hdev->edma_binning,
3043 hdev->decoder_binning);
3044
3045 /*
3046 * at this point the DRAM parameters need to be updated according to data obtained
3047 * from the FW
3048 */
3049 rc = hdev->asic_funcs->set_dram_properties(hdev);
3050 if (rc)
3051 return rc;
3052
3053 rc = hdev->asic_funcs->set_binning_masks(hdev);
3054 if (rc)
3055 return rc;
3056
3057 max_power = hl_fw_get_max_power(hdev);
3058 if (max_power < 0)
3059 return max_power;
3060
3061 prop->max_power_default = (u64) max_power;
3062
3063 return 0;
3064 }
3065
gaudi2_fetch_psoc_frequency(struct hl_device * hdev)3066 static int gaudi2_fetch_psoc_frequency(struct hl_device *hdev)
3067 {
3068 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3069 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS];
3070 int rc;
3071
3072 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
3073 return 0;
3074
3075 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI2_CPU_PLL, pll_freq_arr);
3076 if (rc)
3077 return rc;
3078
3079 hdev->asic_prop.psoc_timestamp_frequency = pll_freq_arr[3];
3080
3081 return 0;
3082 }
3083
gaudi2_mmu_clear_pgt_range(struct hl_device * hdev)3084 static int gaudi2_mmu_clear_pgt_range(struct hl_device *hdev)
3085 {
3086 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3087 struct asic_fixed_properties *prop = &hdev->asic_prop;
3088 int rc;
3089
3090 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
3091 return 0;
3092
3093 if (prop->dmmu.host_resident)
3094 return 0;
3095
3096 rc = gaudi2_memset_device_memory(hdev, prop->mmu_pgt_addr, prop->dmmu.pgt_size, 0);
3097 if (rc)
3098 dev_err(hdev->dev, "Failed to clear mmu pgt");
3099
3100 return rc;
3101 }
3102
gaudi2_early_init(struct hl_device * hdev)3103 static int gaudi2_early_init(struct hl_device *hdev)
3104 {
3105 struct asic_fixed_properties *prop = &hdev->asic_prop;
3106 struct pci_dev *pdev = hdev->pdev;
3107 resource_size_t pci_bar_size;
3108 int rc;
3109
3110 rc = gaudi2_set_fixed_properties(hdev);
3111 if (rc)
3112 return rc;
3113
3114 /* Check BAR sizes */
3115 pci_bar_size = pci_resource_len(pdev, SRAM_CFG_BAR_ID);
3116
3117 if (pci_bar_size != CFG_BAR_SIZE) {
3118 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3119 SRAM_CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE);
3120 rc = -ENODEV;
3121 goto free_queue_props;
3122 }
3123
3124 pci_bar_size = pci_resource_len(pdev, MSIX_BAR_ID);
3125 if (pci_bar_size != MSIX_BAR_SIZE) {
3126 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n",
3127 MSIX_BAR_ID, &pci_bar_size, MSIX_BAR_SIZE);
3128 rc = -ENODEV;
3129 goto free_queue_props;
3130 }
3131
3132 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID);
3133 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID);
3134
3135 /*
3136 * Only in pldm driver config iATU
3137 */
3138 if (hdev->pldm)
3139 hdev->asic_prop.iatu_done_by_fw = false;
3140 else
3141 hdev->asic_prop.iatu_done_by_fw = true;
3142
3143 rc = hl_pci_init(hdev);
3144 if (rc)
3145 goto free_queue_props;
3146
3147 /* Before continuing in the initialization, we need to read the preboot
3148 * version to determine whether we run with a security-enabled firmware
3149 */
3150 rc = hl_fw_read_preboot_status(hdev);
3151 if (rc) {
3152 if (hdev->reset_on_preboot_fail)
3153 /* we are already on failure flow, so don't check if hw_fini fails. */
3154 hdev->asic_funcs->hw_fini(hdev, true, false);
3155 goto pci_fini;
3156 }
3157
3158 if (gaudi2_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
3159 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n");
3160 rc = hdev->asic_funcs->hw_fini(hdev, true, false);
3161 if (rc) {
3162 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc);
3163 goto pci_fini;
3164 }
3165 }
3166
3167 return 0;
3168
3169 pci_fini:
3170 hl_pci_fini(hdev);
3171 free_queue_props:
3172 kfree(hdev->asic_prop.hw_queues_props);
3173 return rc;
3174 }
3175
gaudi2_early_fini(struct hl_device * hdev)3176 static int gaudi2_early_fini(struct hl_device *hdev)
3177 {
3178 kfree(hdev->asic_prop.hw_queues_props);
3179 hl_pci_fini(hdev);
3180
3181 return 0;
3182 }
3183
gaudi2_is_arc_nic_owned(u64 arc_id)3184 static bool gaudi2_is_arc_nic_owned(u64 arc_id)
3185 {
3186 switch (arc_id) {
3187 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
3188 return true;
3189 default:
3190 return false;
3191 }
3192 }
3193
gaudi2_is_arc_tpc_owned(u64 arc_id)3194 static bool gaudi2_is_arc_tpc_owned(u64 arc_id)
3195 {
3196 switch (arc_id) {
3197 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
3198 return true;
3199 default:
3200 return false;
3201 }
3202 }
3203
gaudi2_init_arcs(struct hl_device * hdev)3204 static void gaudi2_init_arcs(struct hl_device *hdev)
3205 {
3206 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
3207 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3208 u64 arc_id;
3209 u32 i;
3210
3211 for (i = CPU_ID_SCHED_ARC0 ; i <= CPU_ID_SCHED_ARC3 ; i++) {
3212 if (gaudi2_is_arc_enabled(hdev, i))
3213 continue;
3214
3215 gaudi2_set_arc_id_cap(hdev, i);
3216 }
3217
3218 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
3219 if (!gaudi2_is_queue_enabled(hdev, i))
3220 continue;
3221
3222 arc_id = gaudi2_queue_id_to_arc_id[i];
3223 if (gaudi2_is_arc_enabled(hdev, arc_id))
3224 continue;
3225
3226 if (gaudi2_is_arc_nic_owned(arc_id) &&
3227 !(hdev->nic_ports_mask & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0)))
3228 continue;
3229
3230 if (gaudi2_is_arc_tpc_owned(arc_id) && !(gaudi2->tpc_hw_cap_initialized &
3231 BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0)))
3232 continue;
3233
3234 gaudi2_set_arc_id_cap(hdev, arc_id);
3235 }
3236
3237 /* Fetch ARC scratchpad address */
3238 hdev->asic_prop.engine_core_interrupt_reg_addr =
3239 CFG_BASE + le32_to_cpu(dyn_regs->eng_arc_irq_ctrl);
3240 }
3241
gaudi2_scrub_arc_dccm(struct hl_device * hdev,u32 cpu_id)3242 static int gaudi2_scrub_arc_dccm(struct hl_device *hdev, u32 cpu_id)
3243 {
3244 u32 reg_base, reg_val;
3245 int rc;
3246
3247 switch (cpu_id) {
3248 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC3:
3249 /* Each ARC scheduler has 2 consecutive DCCM blocks */
3250 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3251 ARC_DCCM_BLOCK_SIZE * 2, true);
3252 if (rc)
3253 return rc;
3254 break;
3255 case CPU_ID_SCHED_ARC4:
3256 case CPU_ID_SCHED_ARC5:
3257 case CPU_ID_MME_QMAN_ARC0:
3258 case CPU_ID_MME_QMAN_ARC1:
3259 reg_base = gaudi2_arc_blocks_bases[cpu_id];
3260
3261 /* Scrub lower DCCM block */
3262 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3263 ARC_DCCM_BLOCK_SIZE, true);
3264 if (rc)
3265 return rc;
3266
3267 /* Switch to upper DCCM block */
3268 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 1);
3269 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3270
3271 /* Scrub upper DCCM block */
3272 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3273 ARC_DCCM_BLOCK_SIZE, true);
3274 if (rc)
3275 return rc;
3276
3277 /* Switch to lower DCCM block */
3278 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_MME_ARC_UPPER_DCCM_EN_VAL_MASK, 0);
3279 WREG32(reg_base + ARC_DCCM_UPPER_EN_OFFSET, reg_val);
3280 break;
3281 default:
3282 rc = gaudi2_send_job_to_kdma(hdev, 0, CFG_BASE + gaudi2_arc_dccm_bases[cpu_id],
3283 ARC_DCCM_BLOCK_SIZE, true);
3284 if (rc)
3285 return rc;
3286 }
3287
3288 return 0;
3289 }
3290
gaudi2_scrub_arcs_dccm(struct hl_device * hdev)3291 static int gaudi2_scrub_arcs_dccm(struct hl_device *hdev)
3292 {
3293 u16 arc_id;
3294 int rc;
3295
3296 for (arc_id = CPU_ID_SCHED_ARC0 ; arc_id < CPU_ID_MAX ; arc_id++) {
3297 if (!gaudi2_is_arc_enabled(hdev, arc_id))
3298 continue;
3299
3300 rc = gaudi2_scrub_arc_dccm(hdev, arc_id);
3301 if (rc)
3302 return rc;
3303 }
3304
3305 return 0;
3306 }
3307
gaudi2_late_init(struct hl_device * hdev)3308 static int gaudi2_late_init(struct hl_device *hdev)
3309 {
3310 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3311 int rc;
3312
3313 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS,
3314 gaudi2->virt_msix_db_dma_addr);
3315 if (rc)
3316 return rc;
3317
3318 rc = gaudi2_fetch_psoc_frequency(hdev);
3319 if (rc) {
3320 dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
3321 goto disable_pci_access;
3322 }
3323
3324 rc = gaudi2_mmu_clear_pgt_range(hdev);
3325 if (rc) {
3326 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
3327 goto disable_pci_access;
3328 }
3329
3330 gaudi2_init_arcs(hdev);
3331
3332 rc = gaudi2_scrub_arcs_dccm(hdev);
3333 if (rc) {
3334 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
3335 goto disable_pci_access;
3336 }
3337
3338 gaudi2_init_security(hdev);
3339
3340 return 0;
3341
3342 disable_pci_access:
3343 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
3344
3345 return rc;
3346 }
3347
gaudi2_late_fini(struct hl_device * hdev)3348 static void gaudi2_late_fini(struct hl_device *hdev)
3349 {
3350 hl_hwmon_release_resources(hdev);
3351 }
3352
gaudi2_user_mapped_dec_init(struct gaudi2_device * gaudi2,u32 start_idx)3353 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx)
3354 {
3355 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3356
3357 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3358 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE0_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3359 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3360 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE1_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3361 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3362 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE2_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3363 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3364 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmDCORE3_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3365 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx++], mmPCIE_DEC0_CMD_BASE, HL_BLOCK_SIZE);
3366 HL_USR_MAPPED_BLK_INIT(&blocks[start_idx], mmPCIE_DEC1_CMD_BASE, HL_BLOCK_SIZE);
3367 }
3368
gaudi2_user_mapped_blocks_init(struct hl_device * hdev)3369 static void gaudi2_user_mapped_blocks_init(struct hl_device *hdev)
3370 {
3371 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3372 struct user_mapped_block *blocks = gaudi2->mapped_blocks;
3373 u32 block_size, umr_start_idx, num_umr_blocks;
3374 int i;
3375
3376 for (i = 0 ; i < NUM_ARC_CPUS ; i++) {
3377 if (i >= CPU_ID_SCHED_ARC0 && i <= CPU_ID_SCHED_ARC3)
3378 block_size = ARC_DCCM_BLOCK_SIZE * 2;
3379 else
3380 block_size = ARC_DCCM_BLOCK_SIZE;
3381
3382 blocks[i].address = gaudi2_arc_dccm_bases[i];
3383 blocks[i].size = block_size;
3384 }
3385
3386 blocks[NUM_ARC_CPUS].address = mmARC_FARM_ARC0_ACP_ENG_BASE;
3387 blocks[NUM_ARC_CPUS].size = HL_BLOCK_SIZE;
3388
3389 blocks[NUM_ARC_CPUS + 1].address = mmARC_FARM_ARC1_ACP_ENG_BASE;
3390 blocks[NUM_ARC_CPUS + 1].size = HL_BLOCK_SIZE;
3391
3392 blocks[NUM_ARC_CPUS + 2].address = mmARC_FARM_ARC2_ACP_ENG_BASE;
3393 blocks[NUM_ARC_CPUS + 2].size = HL_BLOCK_SIZE;
3394
3395 blocks[NUM_ARC_CPUS + 3].address = mmARC_FARM_ARC3_ACP_ENG_BASE;
3396 blocks[NUM_ARC_CPUS + 3].size = HL_BLOCK_SIZE;
3397
3398 blocks[NUM_ARC_CPUS + 4].address = mmDCORE0_MME_QM_ARC_ACP_ENG_BASE;
3399 blocks[NUM_ARC_CPUS + 4].size = HL_BLOCK_SIZE;
3400
3401 blocks[NUM_ARC_CPUS + 5].address = mmDCORE1_MME_QM_ARC_ACP_ENG_BASE;
3402 blocks[NUM_ARC_CPUS + 5].size = HL_BLOCK_SIZE;
3403
3404 blocks[NUM_ARC_CPUS + 6].address = mmDCORE2_MME_QM_ARC_ACP_ENG_BASE;
3405 blocks[NUM_ARC_CPUS + 6].size = HL_BLOCK_SIZE;
3406
3407 blocks[NUM_ARC_CPUS + 7].address = mmDCORE3_MME_QM_ARC_ACP_ENG_BASE;
3408 blocks[NUM_ARC_CPUS + 7].size = HL_BLOCK_SIZE;
3409
3410 umr_start_idx = NUM_ARC_CPUS + NUM_OF_USER_ACP_BLOCKS;
3411 num_umr_blocks = NIC_NUMBER_OF_ENGINES * NUM_OF_USER_NIC_UMR_BLOCKS;
3412 for (i = 0 ; i < num_umr_blocks ; i++) {
3413 u8 nic_id, umr_block_id;
3414
3415 nic_id = i / NUM_OF_USER_NIC_UMR_BLOCKS;
3416 umr_block_id = i % NUM_OF_USER_NIC_UMR_BLOCKS;
3417
3418 blocks[umr_start_idx + i].address =
3419 mmNIC0_UMR0_0_UNSECURE_DOORBELL0_BASE +
3420 (nic_id / NIC_NUMBER_OF_QM_PER_MACRO) * NIC_OFFSET +
3421 (nic_id % NIC_NUMBER_OF_QM_PER_MACRO) * NIC_QM_OFFSET +
3422 umr_block_id * NIC_UMR_OFFSET;
3423 blocks[umr_start_idx + i].size = HL_BLOCK_SIZE;
3424 }
3425
3426 /* Expose decoder HW configuration block to user */
3427 gaudi2_user_mapped_dec_init(gaudi2, USR_MAPPED_BLK_DEC_START_IDX);
3428
3429 for (i = 1; i < NUM_OF_DCORES; ++i) {
3430 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].size = SM_OBJS_BLOCK_SIZE;
3431 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].size = HL_BLOCK_SIZE;
3432
3433 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1)].address =
3434 mmDCORE0_SYNC_MNGR_OBJS_BASE + i * DCORE_OFFSET;
3435
3436 blocks[USR_MAPPED_BLK_SM_START_IDX + 2 * (i - 1) + 1].address =
3437 mmDCORE0_SYNC_MNGR_GLBL_BASE + i * DCORE_OFFSET;
3438 }
3439 }
3440
gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device * hdev)3441 static int gaudi2_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
3442 {
3443 dma_addr_t dma_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
3444 void *virt_addr_arr[GAUDI2_ALLOC_CPU_MEM_RETRY_CNT] = {};
3445 int i, j, rc = 0;
3446
3447 /* The device ARC works with 32-bits addresses, and because there is a single HW register
3448 * that holds the extension bits (49..28), these bits must be identical in all the allocated
3449 * range.
3450 */
3451
3452 for (i = 0 ; i < GAUDI2_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
3453 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
3454 &dma_addr_arr[i], GFP_KERNEL | __GFP_ZERO);
3455 if (!virt_addr_arr[i]) {
3456 rc = -ENOMEM;
3457 goto free_dma_mem_arr;
3458 }
3459
3460 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
3461 if (GAUDI2_ARC_PCI_MSB_ADDR(dma_addr_arr[i]) == GAUDI2_ARC_PCI_MSB_ADDR(end_addr))
3462 break;
3463 }
3464
3465 if (i == GAUDI2_ALLOC_CPU_MEM_RETRY_CNT) {
3466 dev_err(hdev->dev,
3467 "MSB of ARC accessible DMA memory are not identical in all range\n");
3468 rc = -EFAULT;
3469 goto free_dma_mem_arr;
3470 }
3471
3472 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
3473 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
3474
3475 free_dma_mem_arr:
3476 for (j = 0 ; j < i ; j++)
3477 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j],
3478 dma_addr_arr[j]);
3479
3480 return rc;
3481 }
3482
gaudi2_set_pci_memory_regions(struct hl_device * hdev)3483 static void gaudi2_set_pci_memory_regions(struct hl_device *hdev)
3484 {
3485 struct asic_fixed_properties *prop = &hdev->asic_prop;
3486 struct pci_mem_region *region;
3487
3488 /* CFG */
3489 region = &hdev->pci_mem_region[PCI_REGION_CFG];
3490 region->region_base = CFG_BASE;
3491 region->region_size = CFG_SIZE;
3492 region->offset_in_bar = CFG_BASE - STM_FLASH_BASE_ADDR;
3493 region->bar_size = CFG_BAR_SIZE;
3494 region->bar_id = SRAM_CFG_BAR_ID;
3495 region->used = 1;
3496
3497 /* SRAM */
3498 region = &hdev->pci_mem_region[PCI_REGION_SRAM];
3499 region->region_base = SRAM_BASE_ADDR;
3500 region->region_size = SRAM_SIZE;
3501 region->offset_in_bar = CFG_REGION_SIZE + BAR0_RSRVD_SIZE;
3502 region->bar_size = CFG_BAR_SIZE;
3503 region->bar_id = SRAM_CFG_BAR_ID;
3504 region->used = 1;
3505
3506 /* DRAM */
3507 region = &hdev->pci_mem_region[PCI_REGION_DRAM];
3508 region->region_base = DRAM_PHYS_BASE;
3509 region->region_size = hdev->asic_prop.dram_size;
3510 region->offset_in_bar = 0;
3511 region->bar_size = prop->dram_pci_bar_size;
3512 region->bar_id = DRAM_BAR_ID;
3513 region->used = 1;
3514 }
3515
gaudi2_user_interrupt_setup(struct hl_device * hdev)3516 static void gaudi2_user_interrupt_setup(struct hl_device *hdev)
3517 {
3518 struct asic_fixed_properties *prop = &hdev->asic_prop;
3519 int i, j, k;
3520
3521 /* Initialize TPC interrupt */
3522 HL_USR_INTR_STRUCT_INIT(hdev->tpc_interrupt, hdev, 0, HL_USR_INTERRUPT_TPC);
3523
3524 /* Initialize unexpected error interrupt */
3525 HL_USR_INTR_STRUCT_INIT(hdev->unexpected_error_interrupt, hdev, 0,
3526 HL_USR_INTERRUPT_UNEXPECTED);
3527
3528 /* Initialize common user CQ interrupt */
3529 HL_USR_INTR_STRUCT_INIT(hdev->common_user_cq_interrupt, hdev,
3530 HL_COMMON_USER_CQ_INTERRUPT_ID, HL_USR_INTERRUPT_CQ);
3531
3532 /* Initialize common decoder interrupt */
3533 HL_USR_INTR_STRUCT_INIT(hdev->common_decoder_interrupt, hdev,
3534 HL_COMMON_DEC_INTERRUPT_ID, HL_USR_INTERRUPT_DECODER);
3535
3536 /* User interrupts structure holds both decoder and user interrupts from various engines.
3537 * We first initialize the decoder interrupts and then we add the user interrupts.
3538 * The only limitation is that the last decoder interrupt id must be smaller
3539 * then GAUDI2_IRQ_NUM_USER_FIRST. This is checked at compilation time.
3540 */
3541
3542 /* Initialize decoder interrupts, expose only normal interrupts,
3543 * error interrupts to be handled by driver
3544 */
3545 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, j = 0 ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_NRM;
3546 i += 2, j++)
3547 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i,
3548 HL_USR_INTERRUPT_DECODER);
3549
3550 for (i = GAUDI2_IRQ_NUM_USER_FIRST, k = 0 ; k < prop->user_interrupt_count; i++, j++, k++)
3551 HL_USR_INTR_STRUCT_INIT(hdev->user_interrupt[j], hdev, i, HL_USR_INTERRUPT_CQ);
3552 }
3553
gaudi2_get_non_zero_random_int(void)3554 static inline int gaudi2_get_non_zero_random_int(void)
3555 {
3556 int rand = get_random_u32();
3557
3558 return rand ? rand : 1;
3559 }
3560
gaudi2_special_blocks_free(struct hl_device * hdev)3561 static void gaudi2_special_blocks_free(struct hl_device *hdev)
3562 {
3563 struct asic_fixed_properties *prop = &hdev->asic_prop;
3564 struct hl_skip_blocks_cfg *skip_special_blocks_cfg =
3565 &prop->skip_special_blocks_cfg;
3566
3567 kfree(prop->special_blocks);
3568 kfree(skip_special_blocks_cfg->block_types);
3569 kfree(skip_special_blocks_cfg->block_ranges);
3570 }
3571
gaudi2_special_blocks_iterator_free(struct hl_device * hdev)3572 static void gaudi2_special_blocks_iterator_free(struct hl_device *hdev)
3573 {
3574 gaudi2_special_blocks_free(hdev);
3575 }
3576
gaudi2_special_block_skip(struct hl_device * hdev,struct hl_special_blocks_cfg * special_blocks_cfg,u32 blk_idx,u32 major,u32 minor,u32 sub_minor)3577 static bool gaudi2_special_block_skip(struct hl_device *hdev,
3578 struct hl_special_blocks_cfg *special_blocks_cfg,
3579 u32 blk_idx, u32 major, u32 minor, u32 sub_minor)
3580 {
3581 return false;
3582 }
3583
gaudi2_special_blocks_config(struct hl_device * hdev)3584 static int gaudi2_special_blocks_config(struct hl_device *hdev)
3585 {
3586 struct asic_fixed_properties *prop = &hdev->asic_prop;
3587 int i, rc;
3588
3589 /* Configure Special blocks */
3590 prop->glbl_err_max_cause_num = GAUDI2_GLBL_ERR_MAX_CAUSE_NUM;
3591 prop->num_of_special_blocks = ARRAY_SIZE(gaudi2_special_blocks);
3592 prop->special_blocks = kmalloc_array(prop->num_of_special_blocks,
3593 sizeof(*prop->special_blocks), GFP_KERNEL);
3594 if (!prop->special_blocks)
3595 return -ENOMEM;
3596
3597 for (i = 0 ; i < prop->num_of_special_blocks ; i++)
3598 memcpy(&prop->special_blocks[i], &gaudi2_special_blocks[i],
3599 sizeof(*prop->special_blocks));
3600
3601 /* Configure when to skip Special blocks */
3602 memset(&prop->skip_special_blocks_cfg, 0, sizeof(prop->skip_special_blocks_cfg));
3603 prop->skip_special_blocks_cfg.skip_block_hook = gaudi2_special_block_skip;
3604
3605 if (ARRAY_SIZE(gaudi2_iterator_skip_block_types)) {
3606 prop->skip_special_blocks_cfg.block_types =
3607 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_types),
3608 sizeof(gaudi2_iterator_skip_block_types[0]), GFP_KERNEL);
3609 if (!prop->skip_special_blocks_cfg.block_types) {
3610 rc = -ENOMEM;
3611 goto free_special_blocks;
3612 }
3613
3614 memcpy(prop->skip_special_blocks_cfg.block_types, gaudi2_iterator_skip_block_types,
3615 sizeof(gaudi2_iterator_skip_block_types));
3616
3617 prop->skip_special_blocks_cfg.block_types_len =
3618 ARRAY_SIZE(gaudi2_iterator_skip_block_types);
3619 }
3620
3621 if (ARRAY_SIZE(gaudi2_iterator_skip_block_ranges)) {
3622 prop->skip_special_blocks_cfg.block_ranges =
3623 kmalloc_array(ARRAY_SIZE(gaudi2_iterator_skip_block_ranges),
3624 sizeof(gaudi2_iterator_skip_block_ranges[0]), GFP_KERNEL);
3625 if (!prop->skip_special_blocks_cfg.block_ranges) {
3626 rc = -ENOMEM;
3627 goto free_skip_special_blocks_types;
3628 }
3629
3630 for (i = 0 ; i < ARRAY_SIZE(gaudi2_iterator_skip_block_ranges) ; i++)
3631 memcpy(&prop->skip_special_blocks_cfg.block_ranges[i],
3632 &gaudi2_iterator_skip_block_ranges[i],
3633 sizeof(struct range));
3634
3635 prop->skip_special_blocks_cfg.block_ranges_len =
3636 ARRAY_SIZE(gaudi2_iterator_skip_block_ranges);
3637 }
3638
3639 return 0;
3640
3641 free_skip_special_blocks_types:
3642 kfree(prop->skip_special_blocks_cfg.block_types);
3643 free_special_blocks:
3644 kfree(prop->special_blocks);
3645
3646 return rc;
3647 }
3648
gaudi2_special_blocks_iterator_config(struct hl_device * hdev)3649 static int gaudi2_special_blocks_iterator_config(struct hl_device *hdev)
3650 {
3651 return gaudi2_special_blocks_config(hdev);
3652 }
3653
gaudi2_test_queues_msgs_free(struct hl_device * hdev)3654 static void gaudi2_test_queues_msgs_free(struct hl_device *hdev)
3655 {
3656 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3657 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3658 int i;
3659
3660 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3661 /* bail-out if this is an allocation failure point */
3662 if (!msg_info[i].kern_addr)
3663 break;
3664
3665 hl_asic_dma_pool_free(hdev, msg_info[i].kern_addr, msg_info[i].dma_addr);
3666 msg_info[i].kern_addr = NULL;
3667 }
3668 }
3669
gaudi2_test_queues_msgs_alloc(struct hl_device * hdev)3670 static int gaudi2_test_queues_msgs_alloc(struct hl_device *hdev)
3671 {
3672 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3673 struct gaudi2_queues_test_info *msg_info = gaudi2->queues_test_info;
3674 int i, rc;
3675
3676 /* allocate a message-short buf for each Q we intend to test */
3677 for (i = 0 ; i < GAUDI2_NUM_TESTED_QS ; i++) {
3678 msg_info[i].kern_addr =
3679 (void *)hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_short),
3680 GFP_KERNEL, &msg_info[i].dma_addr);
3681 if (!msg_info[i].kern_addr) {
3682 dev_err(hdev->dev,
3683 "Failed to allocate dma memory for H/W queue %d testing\n", i);
3684 rc = -ENOMEM;
3685 goto err_exit;
3686 }
3687 }
3688
3689 return 0;
3690
3691 err_exit:
3692 gaudi2_test_queues_msgs_free(hdev);
3693 return rc;
3694 }
3695
gaudi2_sw_init(struct hl_device * hdev)3696 static int gaudi2_sw_init(struct hl_device *hdev)
3697 {
3698 struct asic_fixed_properties *prop = &hdev->asic_prop;
3699 struct gaudi2_device *gaudi2;
3700 int i, rc;
3701
3702 /* Allocate device structure */
3703 gaudi2 = kzalloc(sizeof(*gaudi2), GFP_KERNEL);
3704 if (!gaudi2)
3705 return -ENOMEM;
3706
3707 for (i = 0 ; i < ARRAY_SIZE(gaudi2_irq_map_table) ; i++) {
3708 if (gaudi2_irq_map_table[i].msg || !gaudi2_irq_map_table[i].valid)
3709 continue;
3710
3711 if (gaudi2->num_of_valid_hw_events == GAUDI2_EVENT_SIZE) {
3712 dev_err(hdev->dev, "H/W events array exceeds the limit of %u events\n",
3713 GAUDI2_EVENT_SIZE);
3714 rc = -EINVAL;
3715 goto free_gaudi2_device;
3716 }
3717
3718 gaudi2->hw_events[gaudi2->num_of_valid_hw_events++] = gaudi2_irq_map_table[i].fc_id;
3719 }
3720
3721 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++)
3722 gaudi2->lfsr_rand_seeds[i] = gaudi2_get_non_zero_random_int();
3723
3724 gaudi2->cpucp_info_get = gaudi2_cpucp_info_get;
3725
3726 hdev->asic_specific = gaudi2;
3727
3728 /* Create DMA pool for small allocations.
3729 * Use DEVICE_CACHE_LINE_SIZE for alignment since the NIC memory-mapped
3730 * PI/CI registers allocated from this pool have this restriction
3731 */
3732 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev,
3733 GAUDI2_DMA_POOL_BLK_SIZE, DEVICE_CACHE_LINE_SIZE, 0);
3734 if (!hdev->dma_pool) {
3735 dev_err(hdev->dev, "failed to create DMA pool\n");
3736 rc = -ENOMEM;
3737 goto free_gaudi2_device;
3738 }
3739
3740 rc = gaudi2_alloc_cpu_accessible_dma_mem(hdev);
3741 if (rc)
3742 goto free_dma_pool;
3743
3744 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
3745 if (!hdev->cpu_accessible_dma_pool) {
3746 dev_err(hdev->dev, "Failed to create CPU accessible DMA pool\n");
3747 rc = -ENOMEM;
3748 goto free_cpu_dma_mem;
3749 }
3750
3751 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, (uintptr_t) hdev->cpu_accessible_dma_mem,
3752 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
3753 if (rc) {
3754 dev_err(hdev->dev, "Failed to add memory to CPU accessible DMA pool\n");
3755 rc = -EFAULT;
3756 goto free_cpu_accessible_dma_pool;
3757 }
3758
3759 gaudi2->virt_msix_db_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, prop->pmmu.page_size,
3760 &gaudi2->virt_msix_db_dma_addr);
3761 if (!gaudi2->virt_msix_db_cpu_addr) {
3762 dev_err(hdev->dev, "Failed to allocate DMA memory for virtual MSI-X doorbell\n");
3763 rc = -ENOMEM;
3764 goto free_cpu_accessible_dma_pool;
3765 }
3766
3767 spin_lock_init(&gaudi2->hw_queues_lock);
3768
3769 gaudi2->scratchpad_bus_address = prop->mmu_pgt_addr + HMMU_PAGE_TABLES_SIZE + EDMA_PQS_SIZE;
3770
3771 gaudi2_user_mapped_blocks_init(hdev);
3772
3773 /* Initialize user interrupts */
3774 gaudi2_user_interrupt_setup(hdev);
3775
3776 hdev->supports_coresight = true;
3777 hdev->supports_sync_stream = true;
3778 hdev->supports_cb_mapping = true;
3779 hdev->supports_wait_for_multi_cs = false;
3780
3781 prop->supports_compute_reset = true;
3782
3783 /* Event queue sanity check added in FW version 1.11 */
3784 if (hl_fw_version_cmp(hdev, 1, 11, 0) < 0)
3785 hdev->event_queue.check_eqe_index = false;
3786 else
3787 hdev->event_queue.check_eqe_index = true;
3788
3789 hdev->asic_funcs->set_pci_memory_regions(hdev);
3790
3791 rc = gaudi2_special_blocks_iterator_config(hdev);
3792 if (rc)
3793 goto free_virt_msix_db_mem;
3794
3795 rc = gaudi2_test_queues_msgs_alloc(hdev);
3796 if (rc)
3797 goto special_blocks_free;
3798
3799 hdev->heartbeat_debug_info.cpu_queue_id = GAUDI2_QUEUE_ID_CPU_PQ;
3800
3801 return 0;
3802
3803 special_blocks_free:
3804 gaudi2_special_blocks_iterator_free(hdev);
3805 free_virt_msix_db_mem:
3806 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3807 free_cpu_accessible_dma_pool:
3808 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3809 free_cpu_dma_mem:
3810 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3811 hdev->cpu_accessible_dma_address);
3812 free_dma_pool:
3813 dma_pool_destroy(hdev->dma_pool);
3814 free_gaudi2_device:
3815 kfree(gaudi2);
3816 return rc;
3817 }
3818
gaudi2_sw_fini(struct hl_device * hdev)3819 static int gaudi2_sw_fini(struct hl_device *hdev)
3820 {
3821 struct asic_fixed_properties *prop = &hdev->asic_prop;
3822 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3823
3824 gaudi2_test_queues_msgs_free(hdev);
3825
3826 gaudi2_special_blocks_iterator_free(hdev);
3827
3828 hl_cpu_accessible_dma_pool_free(hdev, prop->pmmu.page_size, gaudi2->virt_msix_db_cpu_addr);
3829
3830 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
3831
3832 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem,
3833 hdev->cpu_accessible_dma_address);
3834
3835 dma_pool_destroy(hdev->dma_pool);
3836
3837 kfree(gaudi2);
3838
3839 return 0;
3840 }
3841
gaudi2_stop_qman_common(struct hl_device * hdev,u32 reg_base)3842 static void gaudi2_stop_qman_common(struct hl_device *hdev, u32 reg_base)
3843 {
3844 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_STOP |
3845 QM_GLBL_CFG1_CQF_STOP |
3846 QM_GLBL_CFG1_CP_STOP);
3847
3848 /* stop also the ARC */
3849 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_STOP);
3850 }
3851
gaudi2_flush_qman_common(struct hl_device * hdev,u32 reg_base)3852 static void gaudi2_flush_qman_common(struct hl_device *hdev, u32 reg_base)
3853 {
3854 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, QM_GLBL_CFG1_PQF_FLUSH |
3855 QM_GLBL_CFG1_CQF_FLUSH |
3856 QM_GLBL_CFG1_CP_FLUSH);
3857 }
3858
gaudi2_flush_qman_arc_common(struct hl_device * hdev,u32 reg_base)3859 static void gaudi2_flush_qman_arc_common(struct hl_device *hdev, u32 reg_base)
3860 {
3861 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, QM_GLBL_CFG2_ARC_CQF_FLUSH);
3862 }
3863
3864 /**
3865 * gaudi2_clear_qm_fence_counters_common - clear QM's fence counters
3866 *
3867 * @hdev: pointer to the habanalabs device structure
3868 * @queue_id: queue to clear fence counters to
3869 * @skip_fence: if true set maximum fence value to all fence counters to avoid
3870 * getting stuck on any fence value. otherwise set all fence
3871 * counters to 0 (standard clear of fence counters)
3872 */
gaudi2_clear_qm_fence_counters_common(struct hl_device * hdev,u32 queue_id,bool skip_fence)3873 static void gaudi2_clear_qm_fence_counters_common(struct hl_device *hdev, u32 queue_id,
3874 bool skip_fence)
3875 {
3876 u32 size, reg_base;
3877 u32 addr, val;
3878
3879 reg_base = gaudi2_qm_blocks_bases[queue_id];
3880
3881 addr = reg_base + QM_CP_FENCE0_CNT_0_OFFSET;
3882 size = mmPDMA0_QM_CP_BARRIER_CFG - mmPDMA0_QM_CP_FENCE0_CNT_0;
3883
3884 /*
3885 * in case we want to make sure that QM that is stuck on a fence will
3886 * be released we should set the fence counter to a higher value that
3887 * the value the QM waiting for. to comply with any fence counter of
3888 * any value we set maximum fence value to all counters
3889 */
3890 val = skip_fence ? U32_MAX : 0;
3891 gaudi2_memset_device_lbw(hdev, addr, size, val);
3892 }
3893
gaudi2_qman_manual_flush_common(struct hl_device * hdev,u32 queue_id)3894 static void gaudi2_qman_manual_flush_common(struct hl_device *hdev, u32 queue_id)
3895 {
3896 u32 reg_base = gaudi2_qm_blocks_bases[queue_id];
3897
3898 gaudi2_clear_qm_fence_counters_common(hdev, queue_id, true);
3899 gaudi2_flush_qman_common(hdev, reg_base);
3900 gaudi2_flush_qman_arc_common(hdev, reg_base);
3901 }
3902
gaudi2_stop_dma_qmans(struct hl_device * hdev)3903 static void gaudi2_stop_dma_qmans(struct hl_device *hdev)
3904 {
3905 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3906 int dcore, inst;
3907
3908 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
3909 goto stop_edma_qmans;
3910
3911 /* Stop CPs of PDMA QMANs */
3912 gaudi2_stop_qman_common(hdev, mmPDMA0_QM_BASE);
3913 gaudi2_stop_qman_common(hdev, mmPDMA1_QM_BASE);
3914
3915 stop_edma_qmans:
3916 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
3917 return;
3918
3919 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
3920 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
3921 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
3922 u32 qm_base;
3923
3924 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
3925 continue;
3926
3927 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
3928 inst * DCORE_EDMA_OFFSET;
3929
3930 /* Stop CPs of EDMA QMANs */
3931 gaudi2_stop_qman_common(hdev, qm_base);
3932 }
3933 }
3934 }
3935
gaudi2_stop_mme_qmans(struct hl_device * hdev)3936 static void gaudi2_stop_mme_qmans(struct hl_device *hdev)
3937 {
3938 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3939 u32 offset, i;
3940
3941 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
3942
3943 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
3944 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i)))
3945 continue;
3946
3947 gaudi2_stop_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
3948 }
3949 }
3950
gaudi2_stop_tpc_qmans(struct hl_device * hdev)3951 static void gaudi2_stop_tpc_qmans(struct hl_device *hdev)
3952 {
3953 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3954 u32 reg_base;
3955 int i;
3956
3957 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
3958 return;
3959
3960 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
3961 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
3962 continue;
3963
3964 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
3965 gaudi2_stop_qman_common(hdev, reg_base);
3966 }
3967 }
3968
gaudi2_stop_rot_qmans(struct hl_device * hdev)3969 static void gaudi2_stop_rot_qmans(struct hl_device *hdev)
3970 {
3971 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3972 u32 reg_base;
3973 int i;
3974
3975 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
3976 return;
3977
3978 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
3979 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
3980 continue;
3981
3982 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
3983 gaudi2_stop_qman_common(hdev, reg_base);
3984 }
3985 }
3986
gaudi2_stop_nic_qmans(struct hl_device * hdev)3987 static void gaudi2_stop_nic_qmans(struct hl_device *hdev)
3988 {
3989 struct gaudi2_device *gaudi2 = hdev->asic_specific;
3990 u32 reg_base, queue_id;
3991 int i;
3992
3993 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
3994 return;
3995
3996 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
3997
3998 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
3999 if (!(hdev->nic_ports_mask & BIT(i)))
4000 continue;
4001
4002 reg_base = gaudi2_qm_blocks_bases[queue_id];
4003 gaudi2_stop_qman_common(hdev, reg_base);
4004 }
4005 }
4006
gaudi2_stall_dma_common(struct hl_device * hdev,u32 reg_base)4007 static void gaudi2_stall_dma_common(struct hl_device *hdev, u32 reg_base)
4008 {
4009 u32 reg_val;
4010
4011 reg_val = FIELD_PREP(PDMA0_CORE_CFG_1_HALT_MASK, 0x1);
4012 WREG32(reg_base + DMA_CORE_CFG_1_OFFSET, reg_val);
4013 }
4014
gaudi2_dma_stall(struct hl_device * hdev)4015 static void gaudi2_dma_stall(struct hl_device *hdev)
4016 {
4017 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4018 int dcore, inst;
4019
4020 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4021 goto stall_edma;
4022
4023 gaudi2_stall_dma_common(hdev, mmPDMA0_CORE_BASE);
4024 gaudi2_stall_dma_common(hdev, mmPDMA1_CORE_BASE);
4025
4026 stall_edma:
4027 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4028 return;
4029
4030 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4031 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4032 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4033 u32 core_base;
4034
4035 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4036 continue;
4037
4038 core_base = mmDCORE0_EDMA0_CORE_BASE + dcore * DCORE_OFFSET +
4039 inst * DCORE_EDMA_OFFSET;
4040
4041 /* Stall CPs of EDMA QMANs */
4042 gaudi2_stall_dma_common(hdev, core_base);
4043 }
4044 }
4045 }
4046
gaudi2_mme_stall(struct hl_device * hdev)4047 static void gaudi2_mme_stall(struct hl_device *hdev)
4048 {
4049 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4050 u32 offset, i;
4051
4052 offset = mmDCORE1_MME_CTRL_LO_QM_STALL - mmDCORE0_MME_CTRL_LO_QM_STALL;
4053
4054 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4055 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4056 WREG32(mmDCORE0_MME_CTRL_LO_QM_STALL + (i * offset), 1);
4057 }
4058
gaudi2_tpc_stall(struct hl_device * hdev)4059 static void gaudi2_tpc_stall(struct hl_device *hdev)
4060 {
4061 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4062 u32 reg_base;
4063 int i;
4064
4065 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4066 return;
4067
4068 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4069 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4070 continue;
4071
4072 reg_base = gaudi2_tpc_cfg_blocks_bases[i];
4073 WREG32(reg_base + TPC_CFG_STALL_OFFSET, 1);
4074 }
4075 }
4076
gaudi2_rotator_stall(struct hl_device * hdev)4077 static void gaudi2_rotator_stall(struct hl_device *hdev)
4078 {
4079 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4080 u32 reg_val;
4081 int i;
4082
4083 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4084 return;
4085
4086 reg_val = FIELD_PREP(ROT_MSS_HALT_WBC_MASK, 0x1) |
4087 FIELD_PREP(ROT_MSS_HALT_RSB_MASK, 0x1) |
4088 FIELD_PREP(ROT_MSS_HALT_MRSB_MASK, 0x1);
4089
4090 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4091 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4092 continue;
4093
4094 WREG32(mmROT0_MSS_HALT + i * ROT_OFFSET, reg_val);
4095 }
4096 }
4097
gaudi2_disable_qman_common(struct hl_device * hdev,u32 reg_base)4098 static void gaudi2_disable_qman_common(struct hl_device *hdev, u32 reg_base)
4099 {
4100 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, 0);
4101 }
4102
gaudi2_disable_dma_qmans(struct hl_device * hdev)4103 static void gaudi2_disable_dma_qmans(struct hl_device *hdev)
4104 {
4105 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4106 int dcore, inst;
4107
4108 if (!(gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK))
4109 goto stop_edma_qmans;
4110
4111 gaudi2_disable_qman_common(hdev, mmPDMA0_QM_BASE);
4112 gaudi2_disable_qman_common(hdev, mmPDMA1_QM_BASE);
4113
4114 stop_edma_qmans:
4115 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4116 return;
4117
4118 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
4119 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
4120 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
4121 u32 qm_base;
4122
4123 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + seq)))
4124 continue;
4125
4126 qm_base = mmDCORE0_EDMA0_QM_BASE + dcore * DCORE_OFFSET +
4127 inst * DCORE_EDMA_OFFSET;
4128
4129 /* Disable CPs of EDMA QMANs */
4130 gaudi2_disable_qman_common(hdev, qm_base);
4131 }
4132 }
4133 }
4134
gaudi2_disable_mme_qmans(struct hl_device * hdev)4135 static void gaudi2_disable_mme_qmans(struct hl_device *hdev)
4136 {
4137 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4138 u32 offset, i;
4139
4140 offset = mmDCORE1_MME_QM_BASE - mmDCORE0_MME_QM_BASE;
4141
4142 for (i = 0 ; i < NUM_OF_DCORES ; i++)
4143 if (gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + i))
4144 gaudi2_disable_qman_common(hdev, mmDCORE0_MME_QM_BASE + (i * offset));
4145 }
4146
gaudi2_disable_tpc_qmans(struct hl_device * hdev)4147 static void gaudi2_disable_tpc_qmans(struct hl_device *hdev)
4148 {
4149 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4150 u32 reg_base;
4151 int i;
4152
4153 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4154 return;
4155
4156 for (i = 0 ; i < TPC_ID_SIZE ; i++) {
4157 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + i)))
4158 continue;
4159
4160 reg_base = gaudi2_qm_blocks_bases[gaudi2_tpc_id_to_queue_id[i]];
4161 gaudi2_disable_qman_common(hdev, reg_base);
4162 }
4163 }
4164
gaudi2_disable_rot_qmans(struct hl_device * hdev)4165 static void gaudi2_disable_rot_qmans(struct hl_device *hdev)
4166 {
4167 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4168 u32 reg_base;
4169 int i;
4170
4171 if (!(gaudi2->hw_cap_initialized & HW_CAP_ROT_MASK))
4172 return;
4173
4174 for (i = 0 ; i < ROTATOR_ID_SIZE ; i++) {
4175 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_ROT_SHIFT + i)))
4176 continue;
4177
4178 reg_base = gaudi2_qm_blocks_bases[gaudi2_rot_id_to_queue_id[i]];
4179 gaudi2_disable_qman_common(hdev, reg_base);
4180 }
4181 }
4182
gaudi2_disable_nic_qmans(struct hl_device * hdev)4183 static void gaudi2_disable_nic_qmans(struct hl_device *hdev)
4184 {
4185 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4186 u32 reg_base, queue_id;
4187 int i;
4188
4189 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4190 return;
4191
4192 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4193
4194 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4195 if (!(hdev->nic_ports_mask & BIT(i)))
4196 continue;
4197
4198 reg_base = gaudi2_qm_blocks_bases[queue_id];
4199 gaudi2_disable_qman_common(hdev, reg_base);
4200 }
4201 }
4202
gaudi2_enable_timestamp(struct hl_device * hdev)4203 static void gaudi2_enable_timestamp(struct hl_device *hdev)
4204 {
4205 /* Disable the timestamp counter */
4206 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4207
4208 /* Zero the lower/upper parts of the 64-bit counter */
4209 WREG32(mmPSOC_TIMESTAMP_BASE + 0xC, 0);
4210 WREG32(mmPSOC_TIMESTAMP_BASE + 0x8, 0);
4211
4212 /* Enable the counter */
4213 WREG32(mmPSOC_TIMESTAMP_BASE, 1);
4214 }
4215
gaudi2_disable_timestamp(struct hl_device * hdev)4216 static void gaudi2_disable_timestamp(struct hl_device *hdev)
4217 {
4218 /* Disable the timestamp counter */
4219 WREG32(mmPSOC_TIMESTAMP_BASE, 0);
4220 }
4221
gaudi2_irq_name(u16 irq_number)4222 static const char *gaudi2_irq_name(u16 irq_number)
4223 {
4224 switch (irq_number) {
4225 case GAUDI2_IRQ_NUM_EVENT_QUEUE:
4226 return "gaudi2 cpu eq";
4227 case GAUDI2_IRQ_NUM_COMPLETION:
4228 return "gaudi2 completion";
4229 case GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ... GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM:
4230 return gaudi2_vdec_irq_name[irq_number - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM];
4231 case GAUDI2_IRQ_NUM_TPC_ASSERT:
4232 return "gaudi2 tpc assert";
4233 case GAUDI2_IRQ_NUM_UNEXPECTED_ERROR:
4234 return "gaudi2 unexpected error";
4235 case GAUDI2_IRQ_NUM_USER_FIRST ... GAUDI2_IRQ_NUM_USER_LAST:
4236 return "gaudi2 user completion";
4237 case GAUDI2_IRQ_NUM_EQ_ERROR:
4238 return "gaudi2 eq error";
4239 default:
4240 return "invalid";
4241 }
4242 }
4243
gaudi2_dec_disable_msix(struct hl_device * hdev,u32 max_irq_num)4244 static void gaudi2_dec_disable_msix(struct hl_device *hdev, u32 max_irq_num)
4245 {
4246 int i, irq, relative_idx;
4247 struct hl_dec *dec;
4248
4249 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i < max_irq_num ; i++) {
4250 irq = pci_irq_vector(hdev->pdev, i);
4251 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4252
4253 dec = hdev->dec + relative_idx / 2;
4254
4255 /* We pass different structures depending on the irq handler. For the abnormal
4256 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4257 * user_interrupt entry
4258 */
4259 free_irq(irq, ((relative_idx % 2) ?
4260 (void *) dec :
4261 (void *) &hdev->user_interrupt[dec->core_id]));
4262 }
4263 }
4264
gaudi2_dec_enable_msix(struct hl_device * hdev)4265 static int gaudi2_dec_enable_msix(struct hl_device *hdev)
4266 {
4267 int rc, i, irq_init_cnt, irq, relative_idx;
4268 struct hl_dec *dec;
4269
4270 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM, irq_init_cnt = 0;
4271 i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM;
4272 i++, irq_init_cnt++) {
4273
4274 irq = pci_irq_vector(hdev->pdev, i);
4275 relative_idx = i - GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM;
4276
4277 /* We pass different structures depending on the irq handler. For the abnormal
4278 * interrupt we pass hl_dec and for the regular interrupt we pass the relevant
4279 * user_interrupt entry
4280 *
4281 * TODO: change the dec abnrm to threaded irq
4282 */
4283
4284 dec = hdev->dec + relative_idx / 2;
4285 if (relative_idx % 2) {
4286 rc = request_irq(irq, hl_irq_handler_dec_abnrm, 0,
4287 gaudi2_irq_name(i), (void *) dec);
4288 } else {
4289 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4290 (void *) &hdev->user_interrupt[dec->core_id]);
4291 }
4292
4293 if (rc) {
4294 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4295 goto free_dec_irqs;
4296 }
4297 }
4298
4299 return 0;
4300
4301 free_dec_irqs:
4302 gaudi2_dec_disable_msix(hdev, (GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + irq_init_cnt));
4303 return rc;
4304 }
4305
gaudi2_enable_msix(struct hl_device * hdev)4306 static int gaudi2_enable_msix(struct hl_device *hdev)
4307 {
4308 struct asic_fixed_properties *prop = &hdev->asic_prop;
4309 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4310 int rc, irq, i, j, user_irq_init_cnt;
4311 struct hl_cq *cq;
4312
4313 if (gaudi2->hw_cap_initialized & HW_CAP_MSIX)
4314 return 0;
4315
4316 hl_init_cpu_for_irq(hdev);
4317
4318 rc = pci_alloc_irq_vectors(hdev->pdev, GAUDI2_MSIX_ENTRIES, GAUDI2_MSIX_ENTRIES,
4319 PCI_IRQ_MSIX);
4320 if (rc < 0) {
4321 dev_err(hdev->dev, "MSI-X: Failed to enable support -- %d/%d\n",
4322 GAUDI2_MSIX_ENTRIES, rc);
4323 return rc;
4324 }
4325
4326 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4327 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4328 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_COMPLETION), cq);
4329 if (rc) {
4330 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4331 goto free_irq_vectors;
4332 }
4333
4334 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4335 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi2_irq_name(GAUDI2_IRQ_NUM_EVENT_QUEUE),
4336 &hdev->event_queue);
4337 if (rc) {
4338 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4339 goto free_completion_irq;
4340 }
4341
4342 rc = gaudi2_dec_enable_msix(hdev);
4343 if (rc) {
4344 dev_err(hdev->dev, "Failed to enable decoder IRQ");
4345 goto free_event_irq;
4346 }
4347
4348 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4349 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4350 gaudi2_irq_name(GAUDI2_IRQ_NUM_TPC_ASSERT),
4351 &hdev->tpc_interrupt);
4352 if (rc) {
4353 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4354 goto free_dec_irq;
4355 }
4356
4357 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4358 rc = request_threaded_irq(irq, NULL, hl_irq_user_interrupt_thread_handler, IRQF_ONESHOT,
4359 gaudi2_irq_name(GAUDI2_IRQ_NUM_UNEXPECTED_ERROR),
4360 &hdev->unexpected_error_interrupt);
4361 if (rc) {
4362 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4363 goto free_tpc_irq;
4364 }
4365
4366 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0;
4367 user_irq_init_cnt < prop->user_interrupt_count;
4368 i++, j++, user_irq_init_cnt++) {
4369
4370 irq = pci_irq_vector(hdev->pdev, i);
4371 hl_set_irq_affinity(hdev, irq);
4372 rc = request_irq(irq, hl_irq_user_interrupt_handler, 0, gaudi2_irq_name(i),
4373 &hdev->user_interrupt[j]);
4374 if (rc) {
4375 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4376 goto free_user_irq;
4377 }
4378 }
4379
4380 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4381 rc = request_threaded_irq(irq, NULL, hl_irq_eq_error_interrupt_thread_handler,
4382 IRQF_ONESHOT, gaudi2_irq_name(GAUDI2_IRQ_NUM_EQ_ERROR),
4383 hdev);
4384 if (rc) {
4385 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
4386 goto free_user_irq;
4387 }
4388
4389 gaudi2->hw_cap_initialized |= HW_CAP_MSIX;
4390
4391 return 0;
4392
4393 free_user_irq:
4394 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count;
4395 i < GAUDI2_IRQ_NUM_USER_FIRST + user_irq_init_cnt ; i++, j++) {
4396
4397 irq = pci_irq_vector(hdev->pdev, i);
4398 irq_set_affinity_and_hint(irq, NULL);
4399 free_irq(irq, &hdev->user_interrupt[j]);
4400 }
4401 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4402 free_irq(irq, &hdev->unexpected_error_interrupt);
4403 free_tpc_irq:
4404 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4405 free_irq(irq, &hdev->tpc_interrupt);
4406 free_dec_irq:
4407 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_DEC_LAST + 1);
4408 free_event_irq:
4409 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4410 free_irq(irq, cq);
4411
4412 free_completion_irq:
4413 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4414 free_irq(irq, cq);
4415
4416 free_irq_vectors:
4417 pci_free_irq_vectors(hdev->pdev);
4418
4419 return rc;
4420 }
4421
gaudi2_sync_irqs(struct hl_device * hdev)4422 static void gaudi2_sync_irqs(struct hl_device *hdev)
4423 {
4424 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4425 int i, j;
4426 int irq;
4427
4428 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4429 return;
4430
4431 /* Wait for all pending IRQs to be finished */
4432 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION));
4433
4434 for (i = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM ; i <= GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM ; i++) {
4435 irq = pci_irq_vector(hdev->pdev, i);
4436 synchronize_irq(irq);
4437 }
4438
4439 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT));
4440 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR));
4441
4442 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = 0 ; j < hdev->asic_prop.user_interrupt_count;
4443 i++, j++) {
4444 irq = pci_irq_vector(hdev->pdev, i);
4445 synchronize_irq(irq);
4446 }
4447
4448 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE));
4449 synchronize_irq(pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR));
4450 }
4451
gaudi2_disable_msix(struct hl_device * hdev)4452 static void gaudi2_disable_msix(struct hl_device *hdev)
4453 {
4454 struct asic_fixed_properties *prop = &hdev->asic_prop;
4455 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4456 struct hl_cq *cq;
4457 int irq, i, j, k;
4458
4459 if (!(gaudi2->hw_cap_initialized & HW_CAP_MSIX))
4460 return;
4461
4462 gaudi2_sync_irqs(hdev);
4463
4464 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE);
4465 free_irq(irq, &hdev->event_queue);
4466
4467 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1);
4468
4469 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_TPC_ASSERT);
4470 free_irq(irq, &hdev->tpc_interrupt);
4471
4472 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR);
4473 free_irq(irq, &hdev->unexpected_error_interrupt);
4474
4475 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, k = 0;
4476 k < hdev->asic_prop.user_interrupt_count ; i++, j++, k++) {
4477
4478 irq = pci_irq_vector(hdev->pdev, i);
4479 irq_set_affinity_and_hint(irq, NULL);
4480 free_irq(irq, &hdev->user_interrupt[j]);
4481 }
4482
4483 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION);
4484 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_CS_COMPLETION];
4485 free_irq(irq, cq);
4486
4487 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EQ_ERROR);
4488 free_irq(irq, hdev);
4489
4490 pci_free_irq_vectors(hdev->pdev);
4491
4492 gaudi2->hw_cap_initialized &= ~HW_CAP_MSIX;
4493 }
4494
gaudi2_stop_dcore_dec(struct hl_device * hdev,int dcore_id)4495 static void gaudi2_stop_dcore_dec(struct hl_device *hdev, int dcore_id)
4496 {
4497 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4498 u32 graceful_pend_mask = DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4499 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4500 int rc;
4501
4502 if (hdev->pldm)
4503 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4504 else
4505 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4506
4507 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4508 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
4509 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4510 continue;
4511
4512 offset = dcore_id * DCORE_OFFSET + dec_id * DCORE_VDEC_OFFSET;
4513
4514 WREG32(mmDCORE0_DEC0_CMD_SWREG16 + offset, 0);
4515
4516 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4517
4518 /* Wait till all traffic from decoder stops
4519 * before apply core reset.
4520 */
4521 rc = hl_poll_timeout(
4522 hdev,
4523 mmDCORE0_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4524 graceful,
4525 (graceful & graceful_pend_mask),
4526 100,
4527 timeout_usec);
4528 if (rc)
4529 dev_err(hdev->dev,
4530 "Failed to stop traffic from DCORE%d Decoder %d\n",
4531 dcore_id, dec_id);
4532 }
4533 }
4534
gaudi2_stop_pcie_dec(struct hl_device * hdev)4535 static void gaudi2_stop_pcie_dec(struct hl_device *hdev)
4536 {
4537 u32 reg_val = FIELD_PREP(DCORE0_VDEC0_BRDG_CTRL_GRACEFUL_STOP_MASK, 0x1);
4538 u32 graceful_pend_mask = PCIE_VDEC0_BRDG_CTRL_GRACEFUL_PEND_MASK;
4539 u32 timeout_usec, dec_id, dec_bit, offset, graceful;
4540 int rc;
4541
4542 if (hdev->pldm)
4543 timeout_usec = GAUDI2_PLDM_VDEC_TIMEOUT_USEC;
4544 else
4545 timeout_usec = GAUDI2_VDEC_TIMEOUT_USEC;
4546
4547 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
4548 dec_bit = PCIE_DEC_SHIFT + dec_id;
4549 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
4550 continue;
4551
4552 offset = dec_id * PCIE_VDEC_OFFSET;
4553
4554 WREG32(mmPCIE_DEC0_CMD_SWREG16 + offset, 0);
4555
4556 WREG32(mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset, reg_val);
4557
4558 /* Wait till all traffic from decoder stops
4559 * before apply core reset.
4560 */
4561 rc = hl_poll_timeout(
4562 hdev,
4563 mmPCIE_VDEC0_BRDG_CTRL_GRACEFUL + offset,
4564 graceful,
4565 (graceful & graceful_pend_mask),
4566 100,
4567 timeout_usec);
4568 if (rc)
4569 dev_err(hdev->dev,
4570 "Failed to stop traffic from PCIe Decoder %d\n",
4571 dec_id);
4572 }
4573 }
4574
gaudi2_stop_dec(struct hl_device * hdev)4575 static void gaudi2_stop_dec(struct hl_device *hdev)
4576 {
4577 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4578 int dcore_id;
4579
4580 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == 0)
4581 return;
4582
4583 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
4584 gaudi2_stop_dcore_dec(hdev, dcore_id);
4585
4586 gaudi2_stop_pcie_dec(hdev);
4587 }
4588
gaudi2_set_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4589 static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4590 {
4591 u32 reg_base, reg_val;
4592
4593 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4594 if (run_mode == HL_ENGINE_CORE_RUN)
4595 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1);
4596 else
4597 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1);
4598
4599 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val);
4600 }
4601
gaudi2_halt_arcs(struct hl_device * hdev)4602 static void gaudi2_halt_arcs(struct hl_device *hdev)
4603 {
4604 u16 arc_id;
4605
4606 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) {
4607 if (gaudi2_is_arc_enabled(hdev, arc_id))
4608 gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT);
4609 }
4610 }
4611
gaudi2_verify_arc_running_mode(struct hl_device * hdev,u32 cpu_id,u32 run_mode)4612 static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode)
4613 {
4614 int rc;
4615 u32 reg_base, val, ack_mask, timeout_usec = 100000;
4616
4617 if (hdev->pldm)
4618 timeout_usec *= 100;
4619
4620 reg_base = gaudi2_arc_blocks_bases[cpu_id];
4621 if (run_mode == HL_ENGINE_CORE_RUN)
4622 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK;
4623 else
4624 ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK;
4625
4626 rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET,
4627 val, ((val & ack_mask) == ack_mask),
4628 1000, timeout_usec);
4629
4630 if (!rc) {
4631 /* Clear */
4632 val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0);
4633 WREG32(reg_base + ARC_HALT_REQ_OFFSET, val);
4634 }
4635
4636 return rc;
4637 }
4638
gaudi2_reset_arcs(struct hl_device * hdev)4639 static void gaudi2_reset_arcs(struct hl_device *hdev)
4640 {
4641 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4642 u16 arc_id;
4643
4644 if (!gaudi2)
4645 return;
4646
4647 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++)
4648 if (gaudi2_is_arc_enabled(hdev, arc_id))
4649 gaudi2_clr_arc_id_cap(hdev, arc_id);
4650 }
4651
gaudi2_nic_qmans_manual_flush(struct hl_device * hdev)4652 static void gaudi2_nic_qmans_manual_flush(struct hl_device *hdev)
4653 {
4654 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4655 u32 queue_id;
4656 int i;
4657
4658 if (!(gaudi2->nic_hw_cap_initialized & HW_CAP_NIC_MASK))
4659 return;
4660
4661 queue_id = GAUDI2_QUEUE_ID_NIC_0_0;
4662
4663 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
4664 if (!(hdev->nic_ports_mask & BIT(i)))
4665 continue;
4666
4667 gaudi2_qman_manual_flush_common(hdev, queue_id);
4668 }
4669 }
4670
gaudi2_set_engine_cores(struct hl_device * hdev,u32 * core_ids,u32 num_cores,u32 core_command)4671 static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids,
4672 u32 num_cores, u32 core_command)
4673 {
4674 int i, rc;
4675
4676 for (i = 0 ; i < num_cores ; i++) {
4677 if (gaudi2_is_arc_enabled(hdev, core_ids[i]))
4678 gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command);
4679 }
4680
4681 for (i = 0 ; i < num_cores ; i++) {
4682 if (gaudi2_is_arc_enabled(hdev, core_ids[i])) {
4683 rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command);
4684
4685 if (rc) {
4686 dev_err(hdev->dev, "failed to %s arc: %d\n",
4687 (core_command == HL_ENGINE_CORE_HALT) ?
4688 "HALT" : "RUN", core_ids[i]);
4689 return -1;
4690 }
4691 }
4692 }
4693
4694 return 0;
4695 }
4696
gaudi2_set_tpc_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4697 static int gaudi2_set_tpc_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4698 {
4699 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4700 u32 reg_base, reg_addr, reg_val, tpc_id;
4701
4702 if (!(gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK))
4703 return 0;
4704
4705 tpc_id = gaudi2_tpc_engine_id_to_tpc_id[engine_id];
4706 if (!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(HW_CAP_TPC_SHIFT + tpc_id)))
4707 return 0;
4708
4709 reg_base = gaudi2_tpc_cfg_blocks_bases[tpc_id];
4710 reg_addr = reg_base + TPC_CFG_STALL_OFFSET;
4711 reg_val = FIELD_PREP(DCORE0_TPC0_CFG_TPC_STALL_V_MASK,
4712 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4713 WREG32(reg_addr, reg_val);
4714
4715 if (engine_command == HL_ENGINE_RESUME) {
4716 reg_base = gaudi2_tpc_eml_cfg_blocks_bases[tpc_id];
4717 reg_addr = reg_base + TPC_EML_CFG_DBG_CNT_OFFSET;
4718 RMWREG32(reg_addr, 0x1, DCORE0_TPC0_EML_CFG_DBG_CNT_DBG_EXIT_MASK);
4719 }
4720
4721 return 0;
4722 }
4723
gaudi2_set_mme_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4724 static int gaudi2_set_mme_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4725 {
4726 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4727 u32 reg_base, reg_addr, reg_val, mme_id;
4728
4729 mme_id = gaudi2_mme_engine_id_to_mme_id[engine_id];
4730 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_MME_SHIFT + mme_id)))
4731 return 0;
4732
4733 reg_base = gaudi2_mme_ctrl_lo_blocks_bases[mme_id];
4734 reg_addr = reg_base + MME_CTRL_LO_QM_STALL_OFFSET;
4735 reg_val = FIELD_PREP(DCORE0_MME_CTRL_LO_QM_STALL_V_MASK,
4736 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4737 WREG32(reg_addr, reg_val);
4738
4739 return 0;
4740 }
4741
gaudi2_set_edma_engine_mode(struct hl_device * hdev,u32 engine_id,u32 engine_command)4742 static int gaudi2_set_edma_engine_mode(struct hl_device *hdev, u32 engine_id, u32 engine_command)
4743 {
4744 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4745 u32 reg_base, reg_addr, reg_val, edma_id;
4746
4747 if (!(gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK))
4748 return 0;
4749
4750 edma_id = gaudi2_edma_engine_id_to_edma_id[engine_id];
4751 if (!(gaudi2->hw_cap_initialized & BIT_ULL(HW_CAP_EDMA_SHIFT + edma_id)))
4752 return 0;
4753
4754 reg_base = gaudi2_dma_core_blocks_bases[edma_id];
4755 reg_addr = reg_base + EDMA_CORE_CFG_STALL_OFFSET;
4756 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK,
4757 (engine_command == HL_ENGINE_STALL) ? 1 : 0);
4758 WREG32(reg_addr, reg_val);
4759
4760 if (engine_command == HL_ENGINE_STALL) {
4761 reg_val = FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_HALT_MASK, 0x1) |
4762 FIELD_PREP(DCORE0_EDMA0_CORE_CFG_1_FLUSH_MASK, 0x1);
4763 WREG32(reg_addr, reg_val);
4764 }
4765
4766 return 0;
4767 }
4768
gaudi2_set_engine_modes(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4769 static int gaudi2_set_engine_modes(struct hl_device *hdev,
4770 u32 *engine_ids, u32 num_engines, u32 engine_command)
4771 {
4772 int i, rc;
4773
4774 for (i = 0 ; i < num_engines ; ++i) {
4775 switch (engine_ids[i]) {
4776 case GAUDI2_DCORE0_ENGINE_ID_TPC_0 ... GAUDI2_DCORE0_ENGINE_ID_TPC_5:
4777 case GAUDI2_DCORE1_ENGINE_ID_TPC_0 ... GAUDI2_DCORE1_ENGINE_ID_TPC_5:
4778 case GAUDI2_DCORE2_ENGINE_ID_TPC_0 ... GAUDI2_DCORE2_ENGINE_ID_TPC_5:
4779 case GAUDI2_DCORE3_ENGINE_ID_TPC_0 ... GAUDI2_DCORE3_ENGINE_ID_TPC_5:
4780 rc = gaudi2_set_tpc_engine_mode(hdev, engine_ids[i], engine_command);
4781 if (rc)
4782 return rc;
4783
4784 break;
4785 case GAUDI2_DCORE0_ENGINE_ID_MME:
4786 case GAUDI2_DCORE1_ENGINE_ID_MME:
4787 case GAUDI2_DCORE2_ENGINE_ID_MME:
4788 case GAUDI2_DCORE3_ENGINE_ID_MME:
4789 rc = gaudi2_set_mme_engine_mode(hdev, engine_ids[i], engine_command);
4790 if (rc)
4791 return rc;
4792
4793 break;
4794 case GAUDI2_DCORE0_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE0_ENGINE_ID_EDMA_1:
4795 case GAUDI2_DCORE1_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE1_ENGINE_ID_EDMA_1:
4796 case GAUDI2_DCORE2_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE2_ENGINE_ID_EDMA_1:
4797 case GAUDI2_DCORE3_ENGINE_ID_EDMA_0 ... GAUDI2_DCORE3_ENGINE_ID_EDMA_1:
4798 rc = gaudi2_set_edma_engine_mode(hdev, engine_ids[i], engine_command);
4799 if (rc)
4800 return rc;
4801
4802 break;
4803 default:
4804 dev_err(hdev->dev, "Invalid engine ID %u\n", engine_ids[i]);
4805 return -EINVAL;
4806 }
4807 }
4808
4809 return 0;
4810 }
4811
gaudi2_set_engines(struct hl_device * hdev,u32 * engine_ids,u32 num_engines,u32 engine_command)4812 static int gaudi2_set_engines(struct hl_device *hdev, u32 *engine_ids,
4813 u32 num_engines, u32 engine_command)
4814 {
4815 switch (engine_command) {
4816 case HL_ENGINE_CORE_HALT:
4817 case HL_ENGINE_CORE_RUN:
4818 return gaudi2_set_engine_cores(hdev, engine_ids, num_engines, engine_command);
4819
4820 case HL_ENGINE_STALL:
4821 case HL_ENGINE_RESUME:
4822 return gaudi2_set_engine_modes(hdev, engine_ids, num_engines, engine_command);
4823
4824 default:
4825 dev_err(hdev->dev, "failed to execute command id %u\n", engine_command);
4826 return -EINVAL;
4827 }
4828 }
4829
gaudi2_halt_engines(struct hl_device * hdev,bool hard_reset,bool fw_reset)4830 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
4831 {
4832 u32 wait_timeout_ms;
4833
4834 if (hdev->pldm)
4835 wait_timeout_ms = GAUDI2_PLDM_RESET_WAIT_MSEC;
4836 else
4837 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC;
4838
4839 if (fw_reset)
4840 goto skip_engines;
4841
4842 gaudi2_stop_dma_qmans(hdev);
4843 gaudi2_stop_mme_qmans(hdev);
4844 gaudi2_stop_tpc_qmans(hdev);
4845 gaudi2_stop_rot_qmans(hdev);
4846 gaudi2_stop_nic_qmans(hdev);
4847 msleep(wait_timeout_ms);
4848
4849 gaudi2_halt_arcs(hdev);
4850 gaudi2_dma_stall(hdev);
4851 gaudi2_mme_stall(hdev);
4852 gaudi2_tpc_stall(hdev);
4853 gaudi2_rotator_stall(hdev);
4854
4855 msleep(wait_timeout_ms);
4856
4857 gaudi2_stop_dec(hdev);
4858
4859 /*
4860 * in case of soft reset do a manual flush for QMANs (currently called
4861 * only for NIC QMANs
4862 */
4863 if (!hard_reset)
4864 gaudi2_nic_qmans_manual_flush(hdev);
4865
4866 gaudi2_disable_dma_qmans(hdev);
4867 gaudi2_disable_mme_qmans(hdev);
4868 gaudi2_disable_tpc_qmans(hdev);
4869 gaudi2_disable_rot_qmans(hdev);
4870 gaudi2_disable_nic_qmans(hdev);
4871 gaudi2_disable_timestamp(hdev);
4872
4873 skip_engines:
4874 if (hard_reset) {
4875 gaudi2_disable_msix(hdev);
4876 return;
4877 }
4878
4879 gaudi2_sync_irqs(hdev);
4880 }
4881
gaudi2_init_firmware_preload_params(struct hl_device * hdev)4882 static void gaudi2_init_firmware_preload_params(struct hl_device *hdev)
4883 {
4884 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
4885
4886 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS;
4887 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0;
4888 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1;
4889 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0;
4890 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1;
4891 pre_fw_load->wait_for_preboot_timeout = GAUDI2_PREBOOT_REQ_TIMEOUT_USEC;
4892 pre_fw_load->wait_for_preboot_extended_timeout =
4893 GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC;
4894 }
4895
gaudi2_init_firmware_loader(struct hl_device * hdev)4896 static void gaudi2_init_firmware_loader(struct hl_device *hdev)
4897 {
4898 struct fw_load_mgr *fw_loader = &hdev->fw_loader;
4899 struct dynamic_fw_load_mgr *dynamic_loader;
4900 struct cpu_dyn_regs *dyn_regs;
4901
4902 /* fill common fields */
4903 fw_loader->fw_comp_loaded = FW_TYPE_NONE;
4904 fw_loader->boot_fit_img.image_name = GAUDI2_BOOT_FIT_FILE;
4905 fw_loader->linux_img.image_name = GAUDI2_LINUX_FW_FILE;
4906 fw_loader->boot_fit_timeout = GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC;
4907 fw_loader->skip_bmc = false;
4908 fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
4909 fw_loader->dram_bar_id = DRAM_BAR_ID;
4910 fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
4911
4912 /* here we update initial values for few specific dynamic regs (as
4913 * before reading the first descriptor from FW those value has to be
4914 * hard-coded). in later stages of the protocol those values will be
4915 * updated automatically by reading the FW descriptor so data there
4916 * will always be up-to-date
4917 */
4918 dynamic_loader = &hdev->fw_loader.dynamic_loader;
4919 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs;
4920 dyn_regs->kmd_msg_to_cpu = cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU);
4921 dyn_regs->cpu_cmd_status_to_host = cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST);
4922 dynamic_loader->wait_for_bl_timeout = GAUDI2_WAIT_FOR_BL_TIMEOUT_USEC;
4923 }
4924
gaudi2_init_cpu(struct hl_device * hdev)4925 static int gaudi2_init_cpu(struct hl_device *hdev)
4926 {
4927 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4928 int rc;
4929
4930 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU))
4931 return 0;
4932
4933 if (gaudi2->hw_cap_initialized & HW_CAP_CPU)
4934 return 0;
4935
4936 rc = hl_fw_init_cpu(hdev);
4937 if (rc)
4938 return rc;
4939
4940 gaudi2->hw_cap_initialized |= HW_CAP_CPU;
4941
4942 return 0;
4943 }
4944
gaudi2_init_cpu_queues(struct hl_device * hdev,u32 cpu_timeout)4945 static int gaudi2_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
4946 {
4947 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
4948 struct asic_fixed_properties *prop = &hdev->asic_prop;
4949 struct gaudi2_device *gaudi2 = hdev->asic_specific;
4950 struct cpu_dyn_regs *dyn_regs;
4951 struct hl_eq *eq;
4952 u32 status;
4953 int err;
4954
4955 if (!hdev->cpu_queues_enable)
4956 return 0;
4957
4958 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
4959 return 0;
4960
4961 eq = &hdev->event_queue;
4962
4963 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
4964
4965 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
4966 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
4967
4968 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
4969 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
4970
4971 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, lower_32_bits(hdev->cpu_accessible_dma_address));
4972 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, upper_32_bits(hdev->cpu_accessible_dma_address));
4973
4974 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
4975 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
4976 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
4977
4978 /* Used for EQ CI */
4979 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
4980
4981 WREG32(mmCPU_IF_PF_PQ_PI, 0);
4982
4983 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
4984
4985 /* Let the ARC know we are ready as it is now handling those queues */
4986
4987 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
4988 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
4989
4990 err = hl_poll_timeout(
4991 hdev,
4992 mmCPU_IF_QUEUE_INIT,
4993 status,
4994 (status == PQ_INIT_STATUS_READY_FOR_HOST),
4995 1000,
4996 cpu_timeout);
4997
4998 if (err) {
4999 dev_err(hdev->dev, "Failed to communicate with device CPU (timeout)\n");
5000 return -EIO;
5001 }
5002
5003 /* update FW application security bits */
5004 if (prop->fw_cpu_boot_dev_sts0_valid)
5005 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0);
5006
5007 if (prop->fw_cpu_boot_dev_sts1_valid)
5008 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1);
5009
5010 gaudi2->hw_cap_initialized |= HW_CAP_CPU_Q;
5011 return 0;
5012 }
5013
gaudi2_init_qman_pq(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5014 static void gaudi2_init_qman_pq(struct hl_device *hdev, u32 reg_base,
5015 u32 queue_id_base)
5016 {
5017 struct hl_hw_queue *q;
5018 u32 pq_id, pq_offset;
5019
5020 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5021 q = &hdev->kernel_queues[queue_id_base + pq_id];
5022 pq_offset = pq_id * 4;
5023
5024 if (q->dram_bd) {
5025 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5026 lower_32_bits(q->pq_dram_address));
5027 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5028 upper_32_bits(q->pq_dram_address));
5029 } else {
5030 WREG32(reg_base + QM_PQ_BASE_LO_0_OFFSET + pq_offset,
5031 lower_32_bits(q->bus_address));
5032 WREG32(reg_base + QM_PQ_BASE_HI_0_OFFSET + pq_offset,
5033 upper_32_bits(q->bus_address));
5034 }
5035 WREG32(reg_base + QM_PQ_SIZE_0_OFFSET + pq_offset, ilog2(HL_QUEUE_LENGTH));
5036 WREG32(reg_base + QM_PQ_PI_0_OFFSET + pq_offset, 0);
5037 WREG32(reg_base + QM_PQ_CI_0_OFFSET + pq_offset, 0);
5038 }
5039 }
5040
gaudi2_init_qman_cp(struct hl_device * hdev,u32 reg_base)5041 static void gaudi2_init_qman_cp(struct hl_device *hdev, u32 reg_base)
5042 {
5043 u32 cp_id, cp_offset, mtr_base_lo, mtr_base_hi, so_base_lo, so_base_hi;
5044
5045 mtr_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5046 mtr_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
5047 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5048 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5049
5050 for (cp_id = 0 ; cp_id < NUM_OF_CP_PER_QMAN; cp_id++) {
5051 cp_offset = cp_id * 4;
5052
5053 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_LO_0_OFFSET + cp_offset, mtr_base_lo);
5054 WREG32(reg_base + QM_CP_MSG_BASE0_ADDR_HI_0_OFFSET + cp_offset, mtr_base_hi);
5055 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_LO_0_OFFSET + cp_offset, so_base_lo);
5056 WREG32(reg_base + QM_CP_MSG_BASE1_ADDR_HI_0_OFFSET + cp_offset, so_base_hi);
5057 }
5058
5059 /* allow QMANs to accept work from ARC CQF */
5060 WREG32(reg_base + QM_CP_CFG_OFFSET, FIELD_PREP(PDMA0_QM_CP_CFG_SWITCH_EN_MASK, 0x1));
5061 }
5062
gaudi2_init_qman_pqc(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5063 static void gaudi2_init_qman_pqc(struct hl_device *hdev, u32 reg_base,
5064 u32 queue_id_base)
5065 {
5066 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5067 u32 pq_id, pq_offset, so_base_lo, so_base_hi;
5068
5069 so_base_lo = lower_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5070 so_base_hi = upper_32_bits(CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0);
5071
5072 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++) {
5073 pq_offset = pq_id * 4;
5074
5075 /* Configure QMAN HBW to scratchpad as it is not needed */
5076 WREG32(reg_base + QM_PQC_HBW_BASE_LO_0_OFFSET + pq_offset,
5077 lower_32_bits(gaudi2->scratchpad_bus_address));
5078 WREG32(reg_base + QM_PQC_HBW_BASE_HI_0_OFFSET + pq_offset,
5079 upper_32_bits(gaudi2->scratchpad_bus_address));
5080 WREG32(reg_base + QM_PQC_SIZE_0_OFFSET + pq_offset,
5081 ilog2(PAGE_SIZE / sizeof(struct hl_cq_entry)));
5082
5083 WREG32(reg_base + QM_PQC_PI_0_OFFSET + pq_offset, 0);
5084 WREG32(reg_base + QM_PQC_LBW_WDATA_0_OFFSET + pq_offset, QM_PQC_LBW_WDATA);
5085 WREG32(reg_base + QM_PQC_LBW_BASE_LO_0_OFFSET + pq_offset, so_base_lo);
5086 WREG32(reg_base + QM_PQC_LBW_BASE_HI_0_OFFSET + pq_offset, so_base_hi);
5087 }
5088
5089 /* Enable QMAN H/W completion */
5090 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
5091 }
5092
gaudi2_get_dyn_sp_reg(struct hl_device * hdev,u32 queue_id_base)5093 static u32 gaudi2_get_dyn_sp_reg(struct hl_device *hdev, u32 queue_id_base)
5094 {
5095 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5096 u32 sp_reg_addr;
5097
5098 switch (queue_id_base) {
5099 case GAUDI2_QUEUE_ID_PDMA_0_0...GAUDI2_QUEUE_ID_PDMA_1_3:
5100 fallthrough;
5101 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
5102 fallthrough;
5103 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
5104 fallthrough;
5105 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
5106 fallthrough;
5107 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
5108 sp_reg_addr = le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl);
5109 break;
5110 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0...GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
5111 fallthrough;
5112 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0...GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
5113 fallthrough;
5114 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0...GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
5115 fallthrough;
5116 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0...GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
5117 sp_reg_addr = le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl);
5118 break;
5119 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
5120 fallthrough;
5121 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
5122 fallthrough;
5123 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
5124 fallthrough;
5125 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
5126 sp_reg_addr = le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl);
5127 break;
5128 case GAUDI2_QUEUE_ID_ROT_0_0...GAUDI2_QUEUE_ID_ROT_1_3:
5129 sp_reg_addr = le32_to_cpu(dyn_regs->gic_rot_qm_irq_ctrl);
5130 break;
5131 case GAUDI2_QUEUE_ID_NIC_0_0...GAUDI2_QUEUE_ID_NIC_23_3:
5132 sp_reg_addr = le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl);
5133 break;
5134 default:
5135 dev_err(hdev->dev, "Unexpected h/w queue %d\n", queue_id_base);
5136 return 0;
5137 }
5138
5139 return sp_reg_addr;
5140 }
5141
gaudi2_init_qman_common(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5142 static void gaudi2_init_qman_common(struct hl_device *hdev, u32 reg_base,
5143 u32 queue_id_base)
5144 {
5145 u32 glbl_prot = QMAN_MAKE_TRUSTED, irq_handler_offset;
5146 int map_table_entry;
5147
5148 WREG32(reg_base + QM_GLBL_PROT_OFFSET, glbl_prot);
5149
5150 irq_handler_offset = gaudi2_get_dyn_sp_reg(hdev, queue_id_base);
5151 WREG32(reg_base + QM_GLBL_ERR_ADDR_LO_OFFSET, lower_32_bits(CFG_BASE + irq_handler_offset));
5152 WREG32(reg_base + QM_GLBL_ERR_ADDR_HI_OFFSET, upper_32_bits(CFG_BASE + irq_handler_offset));
5153
5154 map_table_entry = gaudi2_qman_async_event_id[queue_id_base];
5155 WREG32(reg_base + QM_GLBL_ERR_WDATA_OFFSET,
5156 gaudi2_irq_map_table[map_table_entry].cpu_id);
5157
5158 WREG32(reg_base + QM_ARB_ERR_MSG_EN_OFFSET, QM_ARB_ERR_MSG_EN_MASK);
5159
5160 WREG32(reg_base + QM_ARB_SLV_CHOISE_WDT_OFFSET, GAUDI2_ARB_WDT_TIMEOUT);
5161 WREG32(reg_base + QM_GLBL_CFG1_OFFSET, 0);
5162 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0);
5163
5164 /* Enable the QMAN channel.
5165 * PDMA QMAN configuration is different, as we do not allow user to
5166 * access some of the CPs.
5167 * PDMA0: CP2/3 are reserved for the ARC usage.
5168 * PDMA1: CP1/2/3 are reserved for the ARC usage.
5169 */
5170 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0])
5171 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE);
5172 else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0])
5173 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE);
5174 else
5175 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE);
5176 }
5177
gaudi2_init_qman(struct hl_device * hdev,u32 reg_base,u32 queue_id_base)5178 static void gaudi2_init_qman(struct hl_device *hdev, u32 reg_base,
5179 u32 queue_id_base)
5180 {
5181 u32 pq_id;
5182
5183 for (pq_id = 0 ; pq_id < NUM_OF_PQ_PER_QMAN ; pq_id++)
5184 hdev->kernel_queues[queue_id_base + pq_id].cq_id = GAUDI2_RESERVED_CQ_CS_COMPLETION;
5185
5186 gaudi2_init_qman_pq(hdev, reg_base, queue_id_base);
5187 gaudi2_init_qman_cp(hdev, reg_base);
5188 gaudi2_init_qman_pqc(hdev, reg_base, queue_id_base);
5189 gaudi2_init_qman_common(hdev, reg_base, queue_id_base);
5190 }
5191
gaudi2_init_dma_core(struct hl_device * hdev,u32 reg_base,u32 dma_core_id,bool is_secure)5192 static void gaudi2_init_dma_core(struct hl_device *hdev, u32 reg_base,
5193 u32 dma_core_id, bool is_secure)
5194 {
5195 u32 prot, irq_handler_offset;
5196 struct cpu_dyn_regs *dyn_regs;
5197 int map_table_entry;
5198
5199 prot = 1 << ARC_FARM_KDMA_PROT_ERR_VAL_SHIFT;
5200 if (is_secure)
5201 prot |= 1 << ARC_FARM_KDMA_PROT_VAL_SHIFT;
5202
5203 WREG32(reg_base + DMA_CORE_PROT_OFFSET, prot);
5204
5205 dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
5206 irq_handler_offset = le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl);
5207
5208 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_LO_OFFSET,
5209 lower_32_bits(CFG_BASE + irq_handler_offset));
5210
5211 WREG32(reg_base + DMA_CORE_ERRMSG_ADDR_HI_OFFSET,
5212 upper_32_bits(CFG_BASE + irq_handler_offset));
5213
5214 map_table_entry = gaudi2_dma_core_async_event_id[dma_core_id];
5215 WREG32(reg_base + DMA_CORE_ERRMSG_WDATA_OFFSET,
5216 gaudi2_irq_map_table[map_table_entry].cpu_id);
5217
5218 /* Enable the DMA channel */
5219 WREG32(reg_base + DMA_CORE_CFG_0_OFFSET, 1 << ARC_FARM_KDMA_CFG_0_EN_SHIFT);
5220 }
5221
gaudi2_init_kdma(struct hl_device * hdev)5222 static void gaudi2_init_kdma(struct hl_device *hdev)
5223 {
5224 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5225 u32 reg_base;
5226
5227 if ((gaudi2->hw_cap_initialized & HW_CAP_KDMA) == HW_CAP_KDMA)
5228 return;
5229
5230 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_KDMA];
5231
5232 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_KDMA, true);
5233
5234 gaudi2->hw_cap_initialized |= HW_CAP_KDMA;
5235 }
5236
gaudi2_init_pdma(struct hl_device * hdev)5237 static void gaudi2_init_pdma(struct hl_device *hdev)
5238 {
5239 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5240 u32 reg_base;
5241
5242 if ((gaudi2->hw_cap_initialized & HW_CAP_PDMA_MASK) == HW_CAP_PDMA_MASK)
5243 return;
5244
5245 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA0];
5246 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA0, false);
5247
5248 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0];
5249 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_0_0);
5250
5251 reg_base = gaudi2_dma_core_blocks_bases[DMA_CORE_ID_PDMA1];
5252 gaudi2_init_dma_core(hdev, reg_base, DMA_CORE_ID_PDMA1, false);
5253
5254 reg_base = gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0];
5255 gaudi2_init_qman(hdev, reg_base, GAUDI2_QUEUE_ID_PDMA_1_0);
5256
5257 gaudi2->hw_cap_initialized |= HW_CAP_PDMA_MASK;
5258 }
5259
gaudi2_init_edma_instance(struct hl_device * hdev,u8 seq)5260 static void gaudi2_init_edma_instance(struct hl_device *hdev, u8 seq)
5261 {
5262 u32 reg_base, base_edma_core_id, base_edma_qman_id;
5263
5264 base_edma_core_id = DMA_CORE_ID_EDMA0 + seq;
5265 base_edma_qman_id = edma_stream_base[seq];
5266
5267 reg_base = gaudi2_dma_core_blocks_bases[base_edma_core_id];
5268 gaudi2_init_dma_core(hdev, reg_base, base_edma_core_id, false);
5269
5270 reg_base = gaudi2_qm_blocks_bases[base_edma_qman_id];
5271 gaudi2_init_qman(hdev, reg_base, base_edma_qman_id);
5272 }
5273
gaudi2_init_edma(struct hl_device * hdev)5274 static void gaudi2_init_edma(struct hl_device *hdev)
5275 {
5276 struct asic_fixed_properties *prop = &hdev->asic_prop;
5277 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5278 int dcore, inst;
5279
5280 if ((gaudi2->hw_cap_initialized & HW_CAP_EDMA_MASK) == HW_CAP_EDMA_MASK)
5281 return;
5282
5283 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
5284 for (inst = 0 ; inst < NUM_OF_EDMA_PER_DCORE ; inst++) {
5285 u8 seq = dcore * NUM_OF_EDMA_PER_DCORE + inst;
5286
5287 if (!(prop->edma_enabled_mask & BIT(seq)))
5288 continue;
5289
5290 gaudi2_init_edma_instance(hdev, seq);
5291
5292 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_EDMA_SHIFT + seq);
5293 }
5294 }
5295 }
5296
5297 /*
5298 * gaudi2_arm_monitors_for_virt_msix_db() - Arm monitors for writing to the virtual MSI-X doorbell.
5299 * @hdev: pointer to habanalabs device structure.
5300 * @sob_id: sync object ID.
5301 * @first_mon_id: ID of first monitor out of 3 consecutive monitors.
5302 * @interrupt_id: interrupt ID.
5303 *
5304 * Some initiators cannot have HBW address in their completion address registers, and thus cannot
5305 * write directly to the HBW host memory of the virtual MSI-X doorbell.
5306 * Instead, they are configured to LBW write to a sync object, and a monitor will do the HBW write.
5307 *
5308 * The mechanism in the sync manager block is composed of a master monitor with 3 messages.
5309 * In addition to the HBW write, the other 2 messages are for preparing the monitor to next
5310 * completion, by decrementing the sync object value and re-arming the monitor.
5311 */
gaudi2_arm_monitors_for_virt_msix_db(struct hl_device * hdev,u32 sob_id,u32 first_mon_id,u32 interrupt_id)5312 static void gaudi2_arm_monitors_for_virt_msix_db(struct hl_device *hdev, u32 sob_id,
5313 u32 first_mon_id, u32 interrupt_id)
5314 {
5315 u32 sob_offset, first_mon_offset, mon_offset, payload, sob_group, mode, arm, config;
5316 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5317 u64 addr;
5318 u8 mask;
5319
5320 /* Reset the SOB value */
5321 sob_offset = sob_id * sizeof(u32);
5322 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
5323
5324 /* Configure 3 monitors:
5325 * 1. Write interrupt ID to the virtual MSI-X doorbell (master monitor)
5326 * 2. Decrement SOB value by 1.
5327 * 3. Re-arm the master monitor.
5328 */
5329
5330 first_mon_offset = first_mon_id * sizeof(u32);
5331
5332 /* 2nd monitor: Decrement SOB value by 1 */
5333 mon_offset = first_mon_offset + sizeof(u32);
5334
5335 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
5336 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5337 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5338
5339 payload = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 0x7FFF) | /* "-1" */
5340 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK, 1) |
5341 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1);
5342 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5343
5344 /* 3rd monitor: Re-arm the master monitor */
5345 mon_offset = first_mon_offset + 2 * sizeof(u32);
5346
5347 addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + first_mon_offset;
5348 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5349 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5350
5351 sob_group = sob_id / 8;
5352 mask = ~BIT(sob_id & 0x7);
5353 mode = 0; /* comparison mode is "greater than or equal to" */
5354 arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sob_group) |
5355 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask) |
5356 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode) |
5357 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, 1);
5358
5359 payload = arm;
5360 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5361
5362 /* 1st monitor (master): Write interrupt ID to the virtual MSI-X doorbell */
5363 mon_offset = first_mon_offset;
5364
5365 config = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_WR_NUM_MASK, 2); /* "2": 3 writes */
5366 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + mon_offset, config);
5367
5368 addr = gaudi2->virt_msix_db_dma_addr;
5369 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, lower_32_bits(addr));
5370 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_offset, upper_32_bits(addr));
5371
5372 payload = interrupt_id;
5373 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, payload);
5374
5375 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, arm);
5376 }
5377
gaudi2_prepare_sm_for_virt_msix_db(struct hl_device * hdev)5378 static void gaudi2_prepare_sm_for_virt_msix_db(struct hl_device *hdev)
5379 {
5380 u32 decoder_id, sob_id, first_mon_id, interrupt_id;
5381 struct asic_fixed_properties *prop = &hdev->asic_prop;
5382
5383 /* Decoder normal/abnormal interrupts */
5384 for (decoder_id = 0 ; decoder_id < NUMBER_OF_DEC ; ++decoder_id) {
5385 if (!(prop->decoder_enabled_mask & BIT(decoder_id)))
5386 continue;
5387
5388 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5389 first_mon_id = GAUDI2_RESERVED_MON_DEC_NRM_FIRST + 3 * decoder_id;
5390 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
5391 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5392
5393 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5394 first_mon_id = GAUDI2_RESERVED_MON_DEC_ABNRM_FIRST + 3 * decoder_id;
5395 interrupt_id += 1;
5396 gaudi2_arm_monitors_for_virt_msix_db(hdev, sob_id, first_mon_id, interrupt_id);
5397 }
5398 }
5399
gaudi2_init_sm(struct hl_device * hdev)5400 static void gaudi2_init_sm(struct hl_device *hdev)
5401 {
5402 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5403 u64 cq_address;
5404 u32 reg_val;
5405 int i;
5406
5407 /* Enable HBW/LBW CQ for completion monitors */
5408 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5409 reg_val |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_LBW_EN_MASK, 1);
5410
5411 for (i = 0 ; i < GAUDI2_MAX_PENDING_CS ; i++)
5412 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5413
5414 /* Enable only HBW CQ for KDMA completion monitor */
5415 reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
5416 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
5417
5418 /* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
5419 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
5420 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
5421 WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
5422
5423 for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
5424 cq_address =
5425 hdev->completion_queue[i].bus_address;
5426
5427 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + (4 * i),
5428 lower_32_bits(cq_address));
5429 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + (4 * i),
5430 upper_32_bits(cq_address));
5431 WREG32(mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + (4 * i),
5432 ilog2(HL_CQ_SIZE_IN_BYTES));
5433 }
5434
5435 /* Configure kernel ASID and MMU BP*/
5436 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_SEC, 0x10000);
5437 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV, 0);
5438
5439 /* Initialize sync objects and monitors which are used for the virtual MSI-X doorbell */
5440 gaudi2_prepare_sm_for_virt_msix_db(hdev);
5441 }
5442
gaudi2_init_mme_acc(struct hl_device * hdev,u32 reg_base)5443 static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
5444 {
5445 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5446 u32 reg_val;
5447 int i;
5448
5449 reg_val = FIELD_PREP(MME_ACC_INTR_MASK_WBC_ERR_RESP_MASK, 0);
5450 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_POS_INF_MASK, 1);
5451 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NEG_INF_MASK, 1);
5452 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_SRC_NAN_MASK, 1);
5453 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_POS_INF_MASK, 1);
5454 reg_val |= FIELD_PREP(MME_ACC_INTR_MASK_AP_RESULT_NEG_INF_MASK, 1);
5455
5456 WREG32(reg_base + MME_ACC_INTR_MASK_OFFSET, reg_val);
5457 WREG32(reg_base + MME_ACC_AP_LFSR_POLY_OFFSET, 0x80DEADAF);
5458
5459 for (i = 0 ; i < MME_NUM_OF_LFSR_SEEDS ; i++) {
5460 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_SEL_OFFSET, i);
5461 WREG32(reg_base + MME_ACC_AP_LFSR_SEED_WDATA_OFFSET, gaudi2->lfsr_rand_seeds[i]);
5462 }
5463 }
5464
gaudi2_init_dcore_mme(struct hl_device * hdev,int dcore_id,bool config_qman_only)5465 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
5466 bool config_qman_only)
5467 {
5468 u32 queue_id_base, reg_base;
5469
5470 switch (dcore_id) {
5471 case 0:
5472 queue_id_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
5473 break;
5474 case 1:
5475 queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
5476 break;
5477 case 2:
5478 queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
5479 break;
5480 case 3:
5481 queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
5482 break;
5483 default:
5484 dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
5485 return;
5486 }
5487
5488 if (!config_qman_only) {
5489 reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
5490 gaudi2_init_mme_acc(hdev, reg_base);
5491 }
5492
5493 reg_base = gaudi2_qm_blocks_bases[queue_id_base];
5494 gaudi2_init_qman(hdev, reg_base, queue_id_base);
5495 }
5496
gaudi2_init_mme(struct hl_device * hdev)5497 static void gaudi2_init_mme(struct hl_device *hdev)
5498 {
5499 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5500 int i;
5501
5502 if ((gaudi2->hw_cap_initialized & HW_CAP_MME_MASK) == HW_CAP_MME_MASK)
5503 return;
5504
5505 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
5506 gaudi2_init_dcore_mme(hdev, i, false);
5507
5508 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_MME_SHIFT + i);
5509 }
5510 }
5511
gaudi2_init_tpc_cfg(struct hl_device * hdev,u32 reg_base)5512 static void gaudi2_init_tpc_cfg(struct hl_device *hdev, u32 reg_base)
5513 {
5514 /* Mask arithmetic and QM interrupts in TPC */
5515 WREG32(reg_base + TPC_CFG_TPC_INTR_MASK_OFFSET, 0x23FFFE);
5516
5517 /* Set 16 cache lines */
5518 WREG32(reg_base + TPC_CFG_MSS_CONFIG_OFFSET,
5519 2 << DCORE0_TPC0_CFG_MSS_CONFIG_ICACHE_FETCH_LINE_NUM_SHIFT);
5520 }
5521
5522 struct gaudi2_tpc_init_cfg_data {
5523 enum gaudi2_queue_id dcore_tpc_qid_base[NUM_OF_DCORES];
5524 };
5525
gaudi2_init_tpc_config(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)5526 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst,
5527 u32 offset, struct iterate_module_ctx *ctx)
5528 {
5529 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5530 struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data;
5531 u32 queue_id_base;
5532 u8 seq;
5533
5534 queue_id_base = cfg_data->dcore_tpc_qid_base[dcore] + (inst * NUM_OF_PQ_PER_QMAN);
5535
5536 if (dcore == 0 && inst == (NUM_DCORE0_TPC - 1))
5537 /* gets last sequence number */
5538 seq = NUM_OF_DCORES * NUM_OF_TPC_PER_DCORE;
5539 else
5540 seq = dcore * NUM_OF_TPC_PER_DCORE + inst;
5541
5542 gaudi2_init_tpc_cfg(hdev, mmDCORE0_TPC0_CFG_BASE + offset);
5543 gaudi2_init_qman(hdev, mmDCORE0_TPC0_QM_BASE + offset, queue_id_base);
5544
5545 gaudi2->tpc_hw_cap_initialized |= BIT_ULL(HW_CAP_TPC_SHIFT + seq);
5546 }
5547
gaudi2_init_tpc(struct hl_device * hdev)5548 static void gaudi2_init_tpc(struct hl_device *hdev)
5549 {
5550 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5551 struct gaudi2_tpc_init_cfg_data init_cfg_data;
5552 struct iterate_module_ctx tpc_iter;
5553
5554 if (!hdev->asic_prop.tpc_enabled_mask)
5555 return;
5556
5557 if ((gaudi2->tpc_hw_cap_initialized & HW_CAP_TPC_MASK) == HW_CAP_TPC_MASK)
5558 return;
5559
5560 init_cfg_data.dcore_tpc_qid_base[0] = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0;
5561 init_cfg_data.dcore_tpc_qid_base[1] = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0;
5562 init_cfg_data.dcore_tpc_qid_base[2] = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0;
5563 init_cfg_data.dcore_tpc_qid_base[3] = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0;
5564 tpc_iter.fn = &gaudi2_init_tpc_config;
5565 tpc_iter.data = &init_cfg_data;
5566 gaudi2_iterate_tpcs(hdev, &tpc_iter);
5567 }
5568
gaudi2_init_rotator(struct hl_device * hdev)5569 static void gaudi2_init_rotator(struct hl_device *hdev)
5570 {
5571 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5572 u32 i, reg_base, queue_id;
5573
5574 queue_id = GAUDI2_QUEUE_ID_ROT_0_0;
5575
5576 for (i = 0 ; i < NUM_OF_ROT ; i++, queue_id += NUM_OF_PQ_PER_QMAN) {
5577 reg_base = gaudi2_qm_blocks_bases[queue_id];
5578 gaudi2_init_qman(hdev, reg_base, queue_id);
5579
5580 gaudi2->hw_cap_initialized |= BIT_ULL(HW_CAP_ROT_SHIFT + i);
5581 }
5582 }
5583
gaudi2_init_vdec_brdg_ctrl(struct hl_device * hdev,u64 base_addr,u32 decoder_id)5584 static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u32 decoder_id)
5585 {
5586 u32 sob_id;
5587
5588 /* VCMD normal interrupt */
5589 sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
5590 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
5591 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5592 WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5593
5594 /* VCMD abnormal interrupt */
5595 sob_id = GAUDI2_RESERVED_SOB_DEC_ABNRM_FIRST + decoder_id;
5596 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR,
5597 mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_id * sizeof(u32));
5598 WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, GAUDI2_SOB_INCREMENT_BY_ONE);
5599 }
5600
gaudi2_init_dec(struct hl_device * hdev)5601 static void gaudi2_init_dec(struct hl_device *hdev)
5602 {
5603 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5604 u32 dcore_id, dec_id, dec_bit;
5605 u64 base_addr;
5606
5607 if (!hdev->asic_prop.decoder_enabled_mask)
5608 return;
5609
5610 if ((gaudi2->dec_hw_cap_initialized & HW_CAP_DEC_MASK) == HW_CAP_DEC_MASK)
5611 return;
5612
5613 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
5614 for (dec_id = 0 ; dec_id < NUM_OF_DEC_PER_DCORE ; dec_id++) {
5615 dec_bit = dcore_id * NUM_OF_DEC_PER_DCORE + dec_id;
5616
5617 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5618 continue;
5619
5620 base_addr = mmDCORE0_DEC0_CMD_BASE +
5621 BRDG_CTRL_BLOCK_OFFSET +
5622 dcore_id * DCORE_OFFSET +
5623 dec_id * DCORE_VDEC_OFFSET;
5624
5625 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5626
5627 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5628 }
5629
5630 for (dec_id = 0 ; dec_id < NUM_OF_PCIE_VDEC ; dec_id++) {
5631 dec_bit = PCIE_DEC_SHIFT + dec_id;
5632 if (!(hdev->asic_prop.decoder_enabled_mask & BIT(dec_bit)))
5633 continue;
5634
5635 base_addr = mmPCIE_DEC0_CMD_BASE + BRDG_CTRL_BLOCK_OFFSET +
5636 dec_id * DCORE_VDEC_OFFSET;
5637
5638 gaudi2_init_vdec_brdg_ctrl(hdev, base_addr, dec_bit);
5639
5640 gaudi2->dec_hw_cap_initialized |= BIT_ULL(HW_CAP_DEC_SHIFT + dec_bit);
5641 }
5642 }
5643
gaudi2_mmu_update_asid_hop0_addr(struct hl_device * hdev,u32 stlb_base,u32 asid,u64 phys_addr)5644 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
5645 u32 stlb_base, u32 asid, u64 phys_addr)
5646 {
5647 u32 status, timeout_usec;
5648 int rc;
5649
5650 if (hdev->pldm || !hdev->pdev)
5651 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5652 else
5653 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
5654
5655 WREG32(stlb_base + STLB_ASID_OFFSET, asid);
5656 WREG32(stlb_base + STLB_HOP0_PA43_12_OFFSET, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
5657 WREG32(stlb_base + STLB_HOP0_PA63_44_OFFSET, phys_addr >> MMU_HOP0_PA63_44_SHIFT);
5658 WREG32(stlb_base + STLB_BUSY_OFFSET, 0x80000000);
5659
5660 rc = hl_poll_timeout(
5661 hdev,
5662 stlb_base + STLB_BUSY_OFFSET,
5663 status,
5664 !(status & 0x80000000),
5665 1000,
5666 timeout_usec);
5667
5668 if (rc) {
5669 dev_err(hdev->dev, "Timeout during MMU hop0 config of asid %d\n", asid);
5670 return rc;
5671 }
5672
5673 return 0;
5674 }
5675
gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device * hdev,u32 stlb_base,u32 start_offset,u32 inv_start_val,u32 flags)5676 static void gaudi2_mmu_send_invalidate_cache_cmd(struct hl_device *hdev, u32 stlb_base,
5677 u32 start_offset, u32 inv_start_val,
5678 u32 flags)
5679 {
5680 /* clear PMMU mem line cache (only needed in mmu range invalidation) */
5681 if (flags & MMU_OP_CLEAR_MEMCACHE)
5682 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INVALIDATION, 0x1);
5683
5684 if (flags & MMU_OP_SKIP_LOW_CACHE_INV)
5685 return;
5686
5687 WREG32(stlb_base + start_offset, inv_start_val);
5688 }
5689
gaudi2_mmu_invalidate_cache_status_poll(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5690 static int gaudi2_mmu_invalidate_cache_status_poll(struct hl_device *hdev, u32 stlb_base,
5691 struct gaudi2_cache_invld_params *inv_params)
5692 {
5693 u32 status, timeout_usec, start_offset;
5694 int rc;
5695
5696 timeout_usec = (hdev->pldm) ? GAUDI2_PLDM_MMU_TIMEOUT_USEC :
5697 GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5698
5699 /* poll PMMU mem line cache (only needed in mmu range invalidation) */
5700 if (inv_params->flags & MMU_OP_CLEAR_MEMCACHE) {
5701 rc = hl_poll_timeout(
5702 hdev,
5703 mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS,
5704 status,
5705 status & 0x1,
5706 1000,
5707 timeout_usec);
5708
5709 if (rc)
5710 return rc;
5711
5712 /* Need to manually reset the status to 0 */
5713 WREG32(mmPMMU_HBW_STLB_MEM_CACHE_INV_STATUS, 0x0);
5714 }
5715
5716 /* Lower cache does not work with cache lines, hence we can skip its
5717 * invalidation upon map and invalidate only upon unmap
5718 */
5719 if (inv_params->flags & MMU_OP_SKIP_LOW_CACHE_INV)
5720 return 0;
5721
5722 start_offset = inv_params->range_invalidation ?
5723 STLB_RANGE_CACHE_INVALIDATION_OFFSET : STLB_INV_ALL_START_OFFSET;
5724
5725 rc = hl_poll_timeout(
5726 hdev,
5727 stlb_base + start_offset,
5728 status,
5729 !(status & 0x1),
5730 1000,
5731 timeout_usec);
5732
5733 return rc;
5734 }
5735
gaudi2_is_hmmu_enabled(struct hl_device * hdev,int dcore_id,int hmmu_id)5736 bool gaudi2_is_hmmu_enabled(struct hl_device *hdev, int dcore_id, int hmmu_id)
5737 {
5738 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5739 u32 hw_cap;
5740
5741 hw_cap = HW_CAP_DCORE0_DMMU0 << (NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id);
5742
5743 if (gaudi2->hw_cap_initialized & hw_cap)
5744 return true;
5745
5746 return false;
5747 }
5748
5749 /* this function shall be called only for HMMUs for which capability bit is set */
get_hmmu_stlb_base(int dcore_id,int hmmu_id)5750 static inline u32 get_hmmu_stlb_base(int dcore_id, int hmmu_id)
5751 {
5752 u32 offset;
5753
5754 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
5755 return (u32)(mmDCORE0_HMMU0_STLB_BASE + offset);
5756 }
5757
gaudi2_mmu_invalidate_cache_trigger(struct hl_device * hdev,u32 stlb_base,struct gaudi2_cache_invld_params * inv_params)5758 static void gaudi2_mmu_invalidate_cache_trigger(struct hl_device *hdev, u32 stlb_base,
5759 struct gaudi2_cache_invld_params *inv_params)
5760 {
5761 u32 start_offset;
5762
5763 if (inv_params->range_invalidation) {
5764 /* Set the addresses range
5765 * Note: that the start address we set in register, is not included in
5766 * the range of the invalidation, by design.
5767 * that's why we need to set lower address than the one we actually
5768 * want to be included in the range invalidation.
5769 */
5770 u64 start = inv_params->start_va - 1;
5771
5772 start_offset = STLB_RANGE_CACHE_INVALIDATION_OFFSET;
5773
5774 WREG32(stlb_base + STLB_RANGE_INV_START_LSB_OFFSET,
5775 start >> MMU_RANGE_INV_VA_LSB_SHIFT);
5776
5777 WREG32(stlb_base + STLB_RANGE_INV_START_MSB_OFFSET,
5778 start >> MMU_RANGE_INV_VA_MSB_SHIFT);
5779
5780 WREG32(stlb_base + STLB_RANGE_INV_END_LSB_OFFSET,
5781 inv_params->end_va >> MMU_RANGE_INV_VA_LSB_SHIFT);
5782
5783 WREG32(stlb_base + STLB_RANGE_INV_END_MSB_OFFSET,
5784 inv_params->end_va >> MMU_RANGE_INV_VA_MSB_SHIFT);
5785 } else {
5786 start_offset = STLB_INV_ALL_START_OFFSET;
5787 }
5788
5789 gaudi2_mmu_send_invalidate_cache_cmd(hdev, stlb_base, start_offset,
5790 inv_params->inv_start_val, inv_params->flags);
5791 }
5792
gaudi2_hmmu_invalidate_cache_trigger(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5793 static inline void gaudi2_hmmu_invalidate_cache_trigger(struct hl_device *hdev,
5794 int dcore_id, int hmmu_id,
5795 struct gaudi2_cache_invld_params *inv_params)
5796 {
5797 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5798
5799 gaudi2_mmu_invalidate_cache_trigger(hdev, stlb_base, inv_params);
5800 }
5801
gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device * hdev,int dcore_id,int hmmu_id,struct gaudi2_cache_invld_params * inv_params)5802 static inline int gaudi2_hmmu_invalidate_cache_status_poll(struct hl_device *hdev,
5803 int dcore_id, int hmmu_id,
5804 struct gaudi2_cache_invld_params *inv_params)
5805 {
5806 u32 stlb_base = get_hmmu_stlb_base(dcore_id, hmmu_id);
5807
5808 return gaudi2_mmu_invalidate_cache_status_poll(hdev, stlb_base, inv_params);
5809 }
5810
gaudi2_hmmus_invalidate_cache(struct hl_device * hdev,struct gaudi2_cache_invld_params * inv_params)5811 static int gaudi2_hmmus_invalidate_cache(struct hl_device *hdev,
5812 struct gaudi2_cache_invld_params *inv_params)
5813 {
5814 int dcore_id, hmmu_id;
5815
5816 /* first send all invalidation commands */
5817 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5818 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5819 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5820 continue;
5821
5822 gaudi2_hmmu_invalidate_cache_trigger(hdev, dcore_id, hmmu_id, inv_params);
5823 }
5824 }
5825
5826 /* next, poll all invalidations status */
5827 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
5828 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE ; hmmu_id++) {
5829 int rc;
5830
5831 if (!gaudi2_is_hmmu_enabled(hdev, dcore_id, hmmu_id))
5832 continue;
5833
5834 rc = gaudi2_hmmu_invalidate_cache_status_poll(hdev, dcore_id, hmmu_id,
5835 inv_params);
5836 if (rc)
5837 return rc;
5838 }
5839 }
5840
5841 return 0;
5842 }
5843
gaudi2_mmu_invalidate_cache(struct hl_device * hdev,bool is_hard,u32 flags)5844 static int gaudi2_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags)
5845 {
5846 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5847 struct gaudi2_cache_invld_params invld_params;
5848 int rc = 0;
5849
5850 if (hdev->reset_info.hard_reset_pending)
5851 return rc;
5852
5853 invld_params.range_invalidation = false;
5854 invld_params.inv_start_val = 1;
5855
5856 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5857 invld_params.flags = flags;
5858 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5859 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5860 &invld_params);
5861 } else if (flags & MMU_OP_PHYS_PACK) {
5862 invld_params.flags = 0;
5863 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5864 }
5865
5866 return rc;
5867 }
5868
gaudi2_mmu_invalidate_cache_range(struct hl_device * hdev,bool is_hard,u32 flags,u32 asid,u64 va,u64 size)5869 static int gaudi2_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
5870 u32 flags, u32 asid, u64 va, u64 size)
5871 {
5872 struct gaudi2_cache_invld_params invld_params = {0};
5873 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5874 u64 start_va, end_va;
5875 u32 inv_start_val;
5876 int rc = 0;
5877
5878 if (hdev->reset_info.hard_reset_pending)
5879 return 0;
5880
5881 inv_start_val = (1 << MMU_RANGE_INV_EN_SHIFT |
5882 1 << MMU_RANGE_INV_ASID_EN_SHIFT |
5883 asid << MMU_RANGE_INV_ASID_SHIFT);
5884 start_va = va;
5885 end_va = start_va + size;
5886
5887 if ((flags & MMU_OP_USERPTR) && (gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
5888 /* As range invalidation does not support zero address we will
5889 * do full invalidation in this case
5890 */
5891 if (start_va) {
5892 invld_params.range_invalidation = true;
5893 invld_params.start_va = start_va;
5894 invld_params.end_va = end_va;
5895 invld_params.inv_start_val = inv_start_val;
5896 invld_params.flags = flags | MMU_OP_CLEAR_MEMCACHE;
5897 } else {
5898 invld_params.range_invalidation = false;
5899 invld_params.inv_start_val = 1;
5900 invld_params.flags = flags;
5901 }
5902
5903
5904 gaudi2_mmu_invalidate_cache_trigger(hdev, mmPMMU_HBW_STLB_BASE, &invld_params);
5905 rc = gaudi2_mmu_invalidate_cache_status_poll(hdev, mmPMMU_HBW_STLB_BASE,
5906 &invld_params);
5907 if (rc)
5908 return rc;
5909
5910 } else if (flags & MMU_OP_PHYS_PACK) {
5911 invld_params.start_va = gaudi2_mmu_scramble_addr(hdev, start_va);
5912 invld_params.end_va = gaudi2_mmu_scramble_addr(hdev, end_va);
5913 invld_params.inv_start_val = inv_start_val;
5914 invld_params.flags = flags;
5915 rc = gaudi2_hmmus_invalidate_cache(hdev, &invld_params);
5916 }
5917
5918 return rc;
5919 }
5920
gaudi2_mmu_update_hop0_addr(struct hl_device * hdev,u32 stlb_base,bool host_resident_pgt)5921 static int gaudi2_mmu_update_hop0_addr(struct hl_device *hdev, u32 stlb_base,
5922 bool host_resident_pgt)
5923 {
5924 struct asic_fixed_properties *prop = &hdev->asic_prop;
5925 u64 hop0_addr;
5926 u32 asid, max_asid = prop->max_asid;
5927 int rc;
5928
5929 /* it takes too much time to init all of the ASIDs on palladium */
5930 if (hdev->pldm)
5931 max_asid = min((u32) 8, max_asid);
5932
5933 for (asid = 0 ; asid < max_asid ; asid++) {
5934 if (host_resident_pgt)
5935 hop0_addr = hdev->mmu_priv.hr.mmu_asid_hop0[asid].phys_addr;
5936 else
5937 hop0_addr = prop->mmu_pgt_addr + (asid * prop->dmmu.hop_table_size);
5938
5939 rc = gaudi2_mmu_update_asid_hop0_addr(hdev, stlb_base, asid, hop0_addr);
5940 if (rc) {
5941 dev_err(hdev->dev, "failed to set hop0 addr for asid %d\n", asid);
5942 return rc;
5943 }
5944 }
5945
5946 return 0;
5947 }
5948
gaudi2_mmu_init_common(struct hl_device * hdev,u32 mmu_base,u32 stlb_base,bool host_resident_pgt)5949 static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base,
5950 bool host_resident_pgt)
5951 {
5952 u32 status, timeout_usec;
5953 int rc;
5954
5955 if (hdev->pldm || !hdev->pdev)
5956 timeout_usec = GAUDI2_PLDM_MMU_TIMEOUT_USEC;
5957 else
5958 timeout_usec = GAUDI2_MMU_CACHE_INV_TIMEOUT_USEC;
5959
5960 WREG32(stlb_base + STLB_INV_ALL_START_OFFSET, 1);
5961
5962 rc = hl_poll_timeout(
5963 hdev,
5964 stlb_base + STLB_SRAM_INIT_OFFSET,
5965 status,
5966 !status,
5967 1000,
5968 timeout_usec);
5969
5970 if (rc)
5971 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU SRAM init\n");
5972
5973 rc = gaudi2_mmu_update_hop0_addr(hdev, stlb_base, host_resident_pgt);
5974 if (rc)
5975 return rc;
5976
5977 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0);
5978
5979 rc = hl_poll_timeout(
5980 hdev,
5981 stlb_base + STLB_INV_ALL_START_OFFSET,
5982 status,
5983 !status,
5984 1000,
5985 timeout_usec);
5986
5987 if (rc)
5988 dev_notice_ratelimited(hdev->dev, "Timeout when waiting for MMU invalidate all\n");
5989
5990 WREG32(mmu_base + MMU_ENABLE_OFFSET, 1);
5991
5992 return rc;
5993 }
5994
gaudi2_pci_mmu_init(struct hl_device * hdev)5995 static int gaudi2_pci_mmu_init(struct hl_device *hdev)
5996 {
5997 struct asic_fixed_properties *prop = &hdev->asic_prop;
5998 struct gaudi2_device *gaudi2 = hdev->asic_specific;
5999 u32 mmu_base, stlb_base;
6000 int rc;
6001
6002 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU)
6003 return 0;
6004
6005 mmu_base = mmPMMU_HBW_MMU_BASE;
6006 stlb_base = mmPMMU_HBW_STLB_BASE;
6007
6008 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6009 (0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
6010 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
6011 (4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
6012 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_SHIFT) |
6013 (5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_SHIFT),
6014 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6015 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6016 PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6017 PMMU_HBW_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6018 PMMU_HBW_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6019
6020 WREG32(stlb_base + STLB_LL_LOOKUP_MASK_63_32_OFFSET, 0);
6021
6022 if (PAGE_SIZE == SZ_64K) {
6023 /* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
6024 RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
6025 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
6026 FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
6027 FIELD_PREP(
6028 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK,
6029 1),
6030 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK |
6031 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK |
6032 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK);
6033 }
6034
6035 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK);
6036
6037 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->pmmu.host_resident);
6038 if (rc)
6039 return rc;
6040
6041 gaudi2->hw_cap_initialized |= HW_CAP_PMMU;
6042
6043 return 0;
6044 }
6045
gaudi2_dcore_hmmu_init(struct hl_device * hdev,int dcore_id,int hmmu_id)6046 static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
6047 int hmmu_id)
6048 {
6049 struct asic_fixed_properties *prop = &hdev->asic_prop;
6050 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6051 u32 offset, mmu_base, stlb_base, hw_cap;
6052 u8 dmmu_seq;
6053 int rc;
6054
6055 dmmu_seq = NUM_OF_HMMU_PER_DCORE * dcore_id + hmmu_id;
6056 hw_cap = HW_CAP_DCORE0_DMMU0 << dmmu_seq;
6057
6058 /*
6059 * return if DMMU is already initialized or if it's not out of
6060 * isolation (due to cluster binning)
6061 */
6062 if ((gaudi2->hw_cap_initialized & hw_cap) || !(prop->hmmu_hif_enabled_mask & BIT(dmmu_seq)))
6063 return 0;
6064
6065 offset = (u32) (dcore_id * DCORE_OFFSET + hmmu_id * DCORE_HMMU_OFFSET);
6066 mmu_base = mmDCORE0_HMMU0_MMU_BASE + offset;
6067 stlb_base = mmDCORE0_HMMU0_STLB_BASE + offset;
6068
6069 RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
6070 MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
6071
6072 RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
6073 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
6074 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
6075 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
6076 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK, 3) |
6077 FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK, 3),
6078 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK |
6079 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK |
6080 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK |
6081 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_LAST_HOP_MASK |
6082 DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FOLLOWER_HOP_MASK);
6083
6084 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1,
6085 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK);
6086
6087 WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK);
6088
6089 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base, prop->dmmu.host_resident);
6090 if (rc)
6091 return rc;
6092
6093 gaudi2->hw_cap_initialized |= hw_cap;
6094
6095 return 0;
6096 }
6097
gaudi2_hbm_mmu_init(struct hl_device * hdev)6098 static int gaudi2_hbm_mmu_init(struct hl_device *hdev)
6099 {
6100 int rc, dcore_id, hmmu_id;
6101
6102 for (dcore_id = 0 ; dcore_id < NUM_OF_DCORES ; dcore_id++)
6103 for (hmmu_id = 0 ; hmmu_id < NUM_OF_HMMU_PER_DCORE; hmmu_id++) {
6104 rc = gaudi2_dcore_hmmu_init(hdev, dcore_id, hmmu_id);
6105 if (rc)
6106 return rc;
6107 }
6108
6109 return 0;
6110 }
6111
gaudi2_mmu_init(struct hl_device * hdev)6112 static int gaudi2_mmu_init(struct hl_device *hdev)
6113 {
6114 int rc;
6115
6116 rc = gaudi2_pci_mmu_init(hdev);
6117 if (rc)
6118 return rc;
6119
6120 rc = gaudi2_hbm_mmu_init(hdev);
6121 if (rc)
6122 return rc;
6123
6124 return 0;
6125 }
6126
gaudi2_hw_init(struct hl_device * hdev)6127 static int gaudi2_hw_init(struct hl_device *hdev)
6128 {
6129 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6130 int rc;
6131
6132 /* Let's mark in the H/W that we have reached this point. We check
6133 * this value in the reset_before_init function to understand whether
6134 * we need to reset the chip before doing H/W init. This register is
6135 * cleared by the H/W upon H/W reset
6136 */
6137 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
6138
6139 /* Perform read from the device to make sure device is up */
6140 RREG32(mmHW_STATE);
6141
6142 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE.
6143 * So we set it here and if anyone tries to move it later to
6144 * a different address, there will be an error
6145 */
6146 if (hdev->asic_prop.iatu_done_by_fw)
6147 gaudi2->dram_bar_cur_addr = DRAM_PHYS_BASE;
6148
6149 /*
6150 * Before pushing u-boot/linux to device, need to set the hbm bar to
6151 * base address of dram
6152 */
6153 if (gaudi2_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
6154 dev_err(hdev->dev, "failed to map HBM bar to DRAM base address\n");
6155 return -EIO;
6156 }
6157
6158 rc = gaudi2_init_cpu(hdev);
6159 if (rc) {
6160 dev_err(hdev->dev, "failed to initialize CPU\n");
6161 return rc;
6162 }
6163
6164 gaudi2_init_scrambler_hbm(hdev);
6165 gaudi2_init_kdma(hdev);
6166
6167 rc = gaudi2_init_cpu_queues(hdev, GAUDI2_CPU_TIMEOUT_USEC);
6168 if (rc) {
6169 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", rc);
6170 return rc;
6171 }
6172
6173 rc = gaudi2->cpucp_info_get(hdev);
6174 if (rc) {
6175 dev_err(hdev->dev, "Failed to get cpucp info\n");
6176 return rc;
6177 }
6178
6179 rc = gaudi2_mmu_init(hdev);
6180 if (rc)
6181 return rc;
6182
6183 gaudi2_init_pdma(hdev);
6184 gaudi2_init_edma(hdev);
6185 gaudi2_init_sm(hdev);
6186 gaudi2_init_tpc(hdev);
6187 gaudi2_init_mme(hdev);
6188 gaudi2_init_rotator(hdev);
6189 gaudi2_init_dec(hdev);
6190 gaudi2_enable_timestamp(hdev);
6191
6192 rc = gaudi2_coresight_init(hdev);
6193 if (rc)
6194 goto disable_queues;
6195
6196 rc = gaudi2_enable_msix(hdev);
6197 if (rc)
6198 goto disable_queues;
6199
6200 /* Perform read from the device to flush all configuration */
6201 RREG32(mmHW_STATE);
6202
6203 return 0;
6204
6205 disable_queues:
6206 gaudi2_disable_dma_qmans(hdev);
6207 gaudi2_disable_mme_qmans(hdev);
6208 gaudi2_disable_tpc_qmans(hdev);
6209 gaudi2_disable_rot_qmans(hdev);
6210 gaudi2_disable_nic_qmans(hdev);
6211
6212 gaudi2_disable_timestamp(hdev);
6213
6214 return rc;
6215 }
6216
6217 /**
6218 * gaudi2_send_hard_reset_cmd - common function to handle reset
6219 *
6220 * @hdev: pointer to the habanalabs device structure
6221 *
6222 * This function handles the various possible scenarios for reset.
6223 * It considers if reset is handled by driver\FW and what FW components are loaded
6224 */
gaudi2_send_hard_reset_cmd(struct hl_device * hdev)6225 static void gaudi2_send_hard_reset_cmd(struct hl_device *hdev)
6226 {
6227 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6228 bool heartbeat_reset, preboot_only, cpu_initialized = false;
6229 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6230 u32 cpu_boot_status;
6231
6232 preboot_only = (hdev->fw_loader.fw_comp_loaded == FW_TYPE_PREBOOT_CPU);
6233 heartbeat_reset = (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT);
6234
6235 /*
6236 * Handle corner case where failure was at cpu management app load,
6237 * and driver didn't detect any failure while loading the FW,
6238 * then at such scenario driver will send only HALT_MACHINE
6239 * and no one will respond to this request since FW already back to preboot
6240 * and it cannot handle such cmd.
6241 * In this case next time the management app loads it'll check on events register
6242 * which will still have the halt indication, and will reboot the device.
6243 * The solution is to let preboot clear all relevant registers before next boot
6244 * once driver send COMMS_RST_DEV.
6245 */
6246 cpu_boot_status = RREG32(mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS);
6247
6248 if (gaudi2 && (gaudi2->hw_cap_initialized & HW_CAP_CPU) &&
6249 (cpu_boot_status == CPU_BOOT_STATUS_SRAM_AVAIL))
6250 cpu_initialized = true;
6251
6252 /*
6253 * when Linux/Bootfit exist this write to the SP can be interpreted in 2 ways:
6254 * 1. FW reset: FW initiate the reset sequence
6255 * 2. driver reset: FW will start HALT sequence (the preparations for the
6256 * reset but not the reset itself as it is not implemented
6257 * on their part) and LKD will wait to let FW complete the
6258 * sequence before issuing the reset
6259 */
6260 if (!preboot_only && cpu_initialized) {
6261 WREG32(le32_to_cpu(dyn_regs->gic_host_halt_irq),
6262 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_HALT_MACHINE].cpu_id);
6263
6264 msleep(GAUDI2_CPU_RESET_WAIT_MSEC);
6265 }
6266
6267 /*
6268 * When working with preboot (without Linux/Boot fit) we can
6269 * communicate only using the COMMS commands to issue halt/reset.
6270 *
6271 * For the case in which we are working with Linux/Bootfit this is a hail-mary
6272 * attempt to revive the card in the small chance that the f/w has
6273 * experienced a watchdog event, which caused it to return back to preboot.
6274 * In that case, triggering reset through GIC won't help. We need to
6275 * trigger the reset as if Linux wasn't loaded.
6276 *
6277 * We do it only if the reset cause was HB, because that would be the
6278 * indication of such an event.
6279 *
6280 * In case watchdog hasn't expired but we still got HB, then this won't
6281 * do any damage.
6282 */
6283
6284 if (heartbeat_reset || preboot_only || !cpu_initialized) {
6285 if (hdev->asic_prop.hard_reset_done_by_fw)
6286 hl_fw_ask_hard_reset_without_linux(hdev);
6287 else
6288 hl_fw_ask_halt_machine_without_linux(hdev);
6289 }
6290 }
6291
6292 /**
6293 * gaudi2_execute_hard_reset - execute hard reset by driver/FW
6294 *
6295 * @hdev: pointer to the habanalabs device structure
6296 *
6297 * This function executes hard reset based on if driver/FW should do the reset
6298 */
gaudi2_execute_hard_reset(struct hl_device * hdev)6299 static void gaudi2_execute_hard_reset(struct hl_device *hdev)
6300 {
6301 if (hdev->asic_prop.hard_reset_done_by_fw) {
6302 gaudi2_send_hard_reset_cmd(hdev);
6303 return;
6304 }
6305
6306 /* Set device to handle FLR by H/W as we will put the device
6307 * CPU to halt mode
6308 */
6309 WREG32(mmPCIE_AUX_FLR_CTRL,
6310 (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | PCIE_AUX_FLR_CTRL_INT_MASK_MASK));
6311
6312 gaudi2_send_hard_reset_cmd(hdev);
6313
6314 WREG32(mmPSOC_RESET_CONF_SW_ALL_RST, 1);
6315 }
6316
6317 /**
6318 * gaudi2_execute_soft_reset - execute soft reset by driver/FW
6319 *
6320 * @hdev: pointer to the habanalabs device structure
6321 * @driver_performs_reset: true if driver should perform reset instead of f/w.
6322 * @poll_timeout_us: time to wait for response from f/w.
6323 *
6324 * This function executes soft reset based on if driver/FW should do the reset
6325 */
gaudi2_execute_soft_reset(struct hl_device * hdev,bool driver_performs_reset,u32 poll_timeout_us)6326 static int gaudi2_execute_soft_reset(struct hl_device *hdev, bool driver_performs_reset,
6327 u32 poll_timeout_us)
6328 {
6329 if (!driver_performs_reset)
6330 return hl_fw_send_soft_reset(hdev);
6331
6332 /* Block access to engines, QMANs and SM during reset, these
6333 * RRs will be reconfigured after soft reset.
6334 * PCIE_MSIX is left unsecured to allow NIC packets processing during the reset.
6335 */
6336 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 1,
6337 mmDCORE0_TPC0_QM_DCCM_BASE, mmPCIE_MSIX_BASE);
6338
6339 gaudi2_write_rr_to_all_lbw_rtrs(hdev, RR_TYPE_LONG, NUM_LONG_LBW_RR - 2,
6340 mmPCIE_MSIX_BASE + HL_BLOCK_SIZE,
6341 mmPCIE_VDEC1_MSTR_IF_RR_SHRD_HBW_BASE + HL_BLOCK_SIZE);
6342
6343 WREG32(mmPSOC_RESET_CONF_SOFT_RST, 1);
6344 return 0;
6345 }
6346
gaudi2_poll_btm_indication(struct hl_device * hdev,u32 poll_timeout_us)6347 static void gaudi2_poll_btm_indication(struct hl_device *hdev, u32 poll_timeout_us)
6348 {
6349 int i, rc = 0;
6350 u32 reg_val;
6351
6352 /* We poll the BTM done indication multiple times after reset due to
6353 * a HW errata 'GAUDI2_0300'
6354 */
6355 for (i = 0 ; i < GAUDI2_RESET_POLL_CNT ; i++)
6356 rc = hl_poll_timeout(
6357 hdev,
6358 mmPSOC_GLOBAL_CONF_BTM_FSM,
6359 reg_val,
6360 reg_val == 0,
6361 1000,
6362 poll_timeout_us);
6363
6364 if (rc)
6365 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", reg_val);
6366 }
6367
gaudi2_hw_fini(struct hl_device * hdev,bool hard_reset,bool fw_reset)6368 static int gaudi2_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
6369 {
6370 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6371 u32 poll_timeout_us, reset_sleep_ms;
6372 bool driver_performs_reset = false;
6373 int rc;
6374
6375 if (hdev->pldm) {
6376 reset_sleep_ms = hard_reset ? GAUDI2_PLDM_HRESET_TIMEOUT_MSEC :
6377 GAUDI2_PLDM_SRESET_TIMEOUT_MSEC;
6378 poll_timeout_us = GAUDI2_PLDM_RESET_POLL_TIMEOUT_USEC;
6379 } else {
6380 reset_sleep_ms = GAUDI2_RESET_TIMEOUT_MSEC;
6381 poll_timeout_us = GAUDI2_RESET_POLL_TIMEOUT_USEC;
6382 }
6383
6384 if (fw_reset)
6385 goto skip_reset;
6386
6387 gaudi2_reset_arcs(hdev);
6388
6389 if (hard_reset) {
6390 driver_performs_reset = !hdev->asic_prop.hard_reset_done_by_fw;
6391 gaudi2_execute_hard_reset(hdev);
6392 } else {
6393 /*
6394 * As we have to support also work with preboot only (which does not supports
6395 * soft reset) we have to make sure that security is disabled before letting driver
6396 * do the reset. user shall control the BFE flags to avoid asking soft reset in
6397 * secured device with preboot only.
6398 */
6399 driver_performs_reset = (hdev->fw_components == FW_TYPE_PREBOOT_CPU &&
6400 !hdev->asic_prop.fw_security_enabled);
6401 rc = gaudi2_execute_soft_reset(hdev, driver_performs_reset, poll_timeout_us);
6402 if (rc)
6403 return rc;
6404 }
6405
6406 skip_reset:
6407 if (driver_performs_reset || hard_reset) {
6408 /*
6409 * Instead of waiting for BTM indication we should wait for preboot ready:
6410 * Consider the below scenario:
6411 * 1. FW update is being triggered
6412 * - setting the dirty bit
6413 * 2. hard reset will be triggered due to the dirty bit
6414 * 3. FW initiates the reset:
6415 * - dirty bit cleared
6416 * - BTM indication cleared
6417 * - preboot ready indication cleared
6418 * 4. during hard reset:
6419 * - BTM indication will be set
6420 * - BIST test performed and another reset triggered
6421 * 5. only after this reset the preboot will set the preboot ready
6422 *
6423 * when polling on BTM indication alone we can lose sync with FW while trying to
6424 * communicate with FW that is during reset.
6425 * to overcome this we will always wait to preboot ready indication
6426 */
6427
6428 /* without this sleep reset will not work */
6429 msleep(reset_sleep_ms);
6430
6431 if (hdev->fw_components & FW_TYPE_PREBOOT_CPU)
6432 hl_fw_wait_preboot_ready(hdev);
6433 else
6434 gaudi2_poll_btm_indication(hdev, poll_timeout_us);
6435 }
6436
6437 if (!gaudi2)
6438 return 0;
6439
6440 gaudi2->dec_hw_cap_initialized &= ~(HW_CAP_DEC_MASK);
6441 gaudi2->tpc_hw_cap_initialized &= ~(HW_CAP_TPC_MASK);
6442
6443 /*
6444 * Clear NIC capability mask in order for driver to re-configure
6445 * NIC QMANs. NIC ports will not be re-configured during soft
6446 * reset as we call gaudi2_nic_init only during hard reset
6447 */
6448 gaudi2->nic_hw_cap_initialized &= ~(HW_CAP_NIC_MASK);
6449
6450 if (hard_reset) {
6451 gaudi2->hw_cap_initialized &=
6452 ~(HW_CAP_DRAM | HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_MASK |
6453 HW_CAP_PMMU | HW_CAP_CPU | HW_CAP_CPU_Q |
6454 HW_CAP_SRAM_SCRAMBLER | HW_CAP_DMMU_MASK |
6455 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_KDMA |
6456 HW_CAP_MME_MASK | HW_CAP_ROT_MASK);
6457
6458 memset(gaudi2->events_stat, 0, sizeof(gaudi2->events_stat));
6459 } else {
6460 gaudi2->hw_cap_initialized &=
6461 ~(HW_CAP_CLK_GATE | HW_CAP_HBM_SCRAMBLER_SW_RESET |
6462 HW_CAP_PDMA_MASK | HW_CAP_EDMA_MASK | HW_CAP_MME_MASK |
6463 HW_CAP_ROT_MASK);
6464 }
6465 return 0;
6466 }
6467
gaudi2_suspend(struct hl_device * hdev)6468 static int gaudi2_suspend(struct hl_device *hdev)
6469 {
6470 return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0);
6471 }
6472
gaudi2_resume(struct hl_device * hdev)6473 static int gaudi2_resume(struct hl_device *hdev)
6474 {
6475 return gaudi2_init_iatu(hdev);
6476 }
6477
gaudi2_mmap(struct hl_device * hdev,struct vm_area_struct * vma,void * cpu_addr,dma_addr_t dma_addr,size_t size)6478 static int gaudi2_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
6479 void *cpu_addr, dma_addr_t dma_addr, size_t size)
6480 {
6481 int rc;
6482
6483 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
6484 VM_DONTCOPY | VM_NORESERVE);
6485
6486 #ifdef _HAS_DMA_MMAP_COHERENT
6487
6488 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size);
6489 if (rc)
6490 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc);
6491
6492 #else
6493
6494 rc = remap_pfn_range(vma, vma->vm_start,
6495 virt_to_phys(cpu_addr) >> PAGE_SHIFT,
6496 size, vma->vm_page_prot);
6497 if (rc)
6498 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
6499
6500 #endif
6501
6502 return rc;
6503 }
6504
gaudi2_is_queue_enabled(struct hl_device * hdev,u32 hw_queue_id)6505 static bool gaudi2_is_queue_enabled(struct hl_device *hdev, u32 hw_queue_id)
6506 {
6507 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6508 u64 hw_cap_mask = 0;
6509 u64 hw_tpc_cap_bit = 0;
6510 u64 hw_nic_cap_bit = 0;
6511 u64 hw_test_cap_bit = 0;
6512
6513 switch (hw_queue_id) {
6514 case GAUDI2_QUEUE_ID_PDMA_0_0:
6515 case GAUDI2_QUEUE_ID_PDMA_0_1:
6516 case GAUDI2_QUEUE_ID_PDMA_1_0:
6517 hw_cap_mask = HW_CAP_PDMA_MASK;
6518 break;
6519 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3:
6520 hw_test_cap_bit = HW_CAP_EDMA_SHIFT +
6521 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2);
6522 break;
6523 case GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3:
6524 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + NUM_OF_EDMA_PER_DCORE +
6525 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0) >> 2);
6526 break;
6527 case GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3:
6528 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 2 * NUM_OF_EDMA_PER_DCORE +
6529 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0) >> 2);
6530 break;
6531 case GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3:
6532 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 3 * NUM_OF_EDMA_PER_DCORE +
6533 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0) >> 2);
6534 break;
6535
6536 case GAUDI2_QUEUE_ID_DCORE0_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE0_MME_0_3:
6537 hw_test_cap_bit = HW_CAP_MME_SHIFT;
6538 break;
6539
6540 case GAUDI2_QUEUE_ID_DCORE1_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE1_MME_0_3:
6541 hw_test_cap_bit = HW_CAP_MME_SHIFT + 1;
6542 break;
6543
6544 case GAUDI2_QUEUE_ID_DCORE2_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE2_MME_0_3:
6545 hw_test_cap_bit = HW_CAP_MME_SHIFT + 2;
6546 break;
6547
6548 case GAUDI2_QUEUE_ID_DCORE3_MME_0_0 ... GAUDI2_QUEUE_ID_DCORE3_MME_0_3:
6549 hw_test_cap_bit = HW_CAP_MME_SHIFT + 3;
6550 break;
6551
6552 case GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_5_3:
6553 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT +
6554 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0) >> 2);
6555
6556 /* special case where cap bit refers to the first queue id */
6557 if (!hw_tpc_cap_bit)
6558 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(0));
6559 break;
6560
6561 case GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE1_TPC_5_3:
6562 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + NUM_OF_TPC_PER_DCORE +
6563 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0) >> 2);
6564 break;
6565
6566 case GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE2_TPC_5_3:
6567 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (2 * NUM_OF_TPC_PER_DCORE) +
6568 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0) >> 2);
6569 break;
6570
6571 case GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 ... GAUDI2_QUEUE_ID_DCORE3_TPC_5_3:
6572 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (3 * NUM_OF_TPC_PER_DCORE) +
6573 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0) >> 2);
6574 break;
6575
6576 case GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 ... GAUDI2_QUEUE_ID_DCORE0_TPC_6_3:
6577 hw_tpc_cap_bit = HW_CAP_TPC_SHIFT + (4 * NUM_OF_TPC_PER_DCORE);
6578 break;
6579
6580 case GAUDI2_QUEUE_ID_ROT_0_0 ... GAUDI2_QUEUE_ID_ROT_1_3:
6581 hw_test_cap_bit = HW_CAP_ROT_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_ROT_0_0) >> 2);
6582 break;
6583
6584 case GAUDI2_QUEUE_ID_NIC_0_0 ... GAUDI2_QUEUE_ID_NIC_23_3:
6585 hw_nic_cap_bit = HW_CAP_NIC_SHIFT + ((hw_queue_id - GAUDI2_QUEUE_ID_NIC_0_0) >> 2);
6586
6587 /* special case where cap bit refers to the first queue id */
6588 if (!hw_nic_cap_bit)
6589 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(0));
6590 break;
6591
6592 case GAUDI2_QUEUE_ID_CPU_PQ:
6593 return !!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q);
6594
6595 default:
6596 return false;
6597 }
6598
6599 if (hw_tpc_cap_bit)
6600 return !!(gaudi2->tpc_hw_cap_initialized & BIT_ULL(hw_tpc_cap_bit));
6601
6602 if (hw_nic_cap_bit)
6603 return !!(gaudi2->nic_hw_cap_initialized & BIT_ULL(hw_nic_cap_bit));
6604
6605 if (hw_test_cap_bit)
6606 hw_cap_mask = BIT_ULL(hw_test_cap_bit);
6607
6608 return !!(gaudi2->hw_cap_initialized & hw_cap_mask);
6609 }
6610
gaudi2_is_arc_enabled(struct hl_device * hdev,u64 arc_id)6611 static bool gaudi2_is_arc_enabled(struct hl_device *hdev, u64 arc_id)
6612 {
6613 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6614
6615 switch (arc_id) {
6616 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6617 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6618 return !!(gaudi2->active_hw_arc & BIT_ULL(arc_id));
6619
6620 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6621 return !!(gaudi2->active_tpc_arc & BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6622
6623 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6624 return !!(gaudi2->active_nic_arc & BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6625
6626 default:
6627 return false;
6628 }
6629 }
6630
gaudi2_clr_arc_id_cap(struct hl_device * hdev,u64 arc_id)6631 static void gaudi2_clr_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6632 {
6633 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6634
6635 switch (arc_id) {
6636 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6637 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6638 gaudi2->active_hw_arc &= ~(BIT_ULL(arc_id));
6639 break;
6640
6641 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6642 gaudi2->active_tpc_arc &= ~(BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0));
6643 break;
6644
6645 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6646 gaudi2->active_nic_arc &= ~(BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0));
6647 break;
6648
6649 default:
6650 return;
6651 }
6652 }
6653
gaudi2_set_arc_id_cap(struct hl_device * hdev,u64 arc_id)6654 static void gaudi2_set_arc_id_cap(struct hl_device *hdev, u64 arc_id)
6655 {
6656 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6657
6658 switch (arc_id) {
6659 case CPU_ID_SCHED_ARC0 ... CPU_ID_SCHED_ARC5:
6660 case CPU_ID_MME_QMAN_ARC0...CPU_ID_ROT_QMAN_ARC1:
6661 gaudi2->active_hw_arc |= BIT_ULL(arc_id);
6662 break;
6663
6664 case CPU_ID_TPC_QMAN_ARC0...CPU_ID_TPC_QMAN_ARC24:
6665 gaudi2->active_tpc_arc |= BIT_ULL(arc_id - CPU_ID_TPC_QMAN_ARC0);
6666 break;
6667
6668 case CPU_ID_NIC_QMAN_ARC0...CPU_ID_NIC_QMAN_ARC23:
6669 gaudi2->active_nic_arc |= BIT_ULL(arc_id - CPU_ID_NIC_QMAN_ARC0);
6670 break;
6671
6672 default:
6673 return;
6674 }
6675 }
6676
gaudi2_ring_doorbell(struct hl_device * hdev,u32 hw_queue_id,u32 pi)6677 static void gaudi2_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
6678 {
6679 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
6680 u32 pq_offset, reg_base, db_reg_offset, db_value;
6681
6682 if (hw_queue_id != GAUDI2_QUEUE_ID_CPU_PQ) {
6683 /*
6684 * QMAN has 4 successive PQ_PI registers, 1 for each of the QMAN PQs.
6685 * Masking the H/W queue ID with 0x3 extracts the QMAN internal PQ
6686 * number.
6687 */
6688 pq_offset = (hw_queue_id & 0x3) * 4;
6689 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6690 db_reg_offset = reg_base + QM_PQ_PI_0_OFFSET + pq_offset;
6691 } else {
6692 db_reg_offset = mmCPU_IF_PF_PQ_PI;
6693 }
6694
6695 db_value = pi;
6696
6697 /* ring the doorbell */
6698 WREG32(db_reg_offset, db_value);
6699
6700 if (hw_queue_id == GAUDI2_QUEUE_ID_CPU_PQ) {
6701 /* make sure device CPU will read latest data from host */
6702 mb();
6703 WREG32(le32_to_cpu(dyn_regs->gic_host_pi_upd_irq),
6704 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_PI_UPDATE].cpu_id);
6705 }
6706 }
6707
gaudi2_pqe_write(struct hl_device * hdev,__le64 * pqe,struct hl_bd * bd)6708 static void gaudi2_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
6709 {
6710 __le64 *pbd = (__le64 *) bd;
6711
6712 /* The QMANs are on the host memory so a simple copy suffice */
6713 pqe[0] = pbd[0];
6714 pqe[1] = pbd[1];
6715 }
6716
gaudi2_dma_alloc_coherent(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle,gfp_t flags)6717 static void *gaudi2_dma_alloc_coherent(struct hl_device *hdev, size_t size,
6718 dma_addr_t *dma_handle, gfp_t flags)
6719 {
6720 return dma_alloc_coherent(&hdev->pdev->dev, size, dma_handle, flags);
6721 }
6722
gaudi2_dma_free_coherent(struct hl_device * hdev,size_t size,void * cpu_addr,dma_addr_t dma_handle)6723 static void gaudi2_dma_free_coherent(struct hl_device *hdev, size_t size,
6724 void *cpu_addr, dma_addr_t dma_handle)
6725 {
6726 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, dma_handle);
6727 }
6728
gaudi2_send_cpu_message(struct hl_device * hdev,u32 * msg,u16 len,u32 timeout,u64 * result)6729 static int gaudi2_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
6730 u32 timeout, u64 *result)
6731 {
6732 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6733
6734 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)) {
6735 if (result)
6736 *result = 0;
6737 return 0;
6738 }
6739
6740 if (!timeout)
6741 timeout = GAUDI2_MSG_TO_CPU_TIMEOUT_USEC;
6742
6743 return hl_fw_send_cpu_message(hdev, GAUDI2_QUEUE_ID_CPU_PQ, msg, len, timeout, result);
6744 }
6745
gaudi2_dma_pool_zalloc(struct hl_device * hdev,size_t size,gfp_t mem_flags,dma_addr_t * dma_handle)6746 static void *gaudi2_dma_pool_zalloc(struct hl_device *hdev, size_t size,
6747 gfp_t mem_flags, dma_addr_t *dma_handle)
6748 {
6749 if (size > GAUDI2_DMA_POOL_BLK_SIZE)
6750 return NULL;
6751
6752 return dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
6753 }
6754
gaudi2_dma_pool_free(struct hl_device * hdev,void * vaddr,dma_addr_t dma_addr)6755 static void gaudi2_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
6756 {
6757 dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
6758 }
6759
gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device * hdev,size_t size,dma_addr_t * dma_handle)6760 static void *gaudi2_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
6761 dma_addr_t *dma_handle)
6762 {
6763 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
6764 }
6765
gaudi2_cpu_accessible_dma_pool_free(struct hl_device * hdev,size_t size,void * vaddr)6766 static void gaudi2_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
6767 {
6768 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
6769 }
6770
gaudi2_validate_cb_address(struct hl_device * hdev,struct hl_cs_parser * parser)6771 static int gaudi2_validate_cb_address(struct hl_device *hdev, struct hl_cs_parser *parser)
6772 {
6773 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
6774 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6775
6776 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) {
6777 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id);
6778 return -EINVAL;
6779 }
6780
6781 /* Just check if CB address is valid */
6782
6783 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6784 parser->user_cb_size,
6785 asic_prop->sram_user_base_address,
6786 asic_prop->sram_end_address))
6787 return 0;
6788
6789 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6790 parser->user_cb_size,
6791 asic_prop->dram_user_base_address,
6792 asic_prop->dram_end_address))
6793 return 0;
6794
6795 if ((gaudi2->hw_cap_initialized & HW_CAP_DMMU_MASK) &&
6796 hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6797 parser->user_cb_size,
6798 asic_prop->dmmu.start_addr,
6799 asic_prop->dmmu.end_addr))
6800 return 0;
6801
6802 if (gaudi2->hw_cap_initialized & HW_CAP_PMMU) {
6803 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
6804 parser->user_cb_size,
6805 asic_prop->pmmu.start_addr,
6806 asic_prop->pmmu.end_addr) ||
6807 hl_mem_area_inside_range(
6808 (u64) (uintptr_t) parser->user_cb,
6809 parser->user_cb_size,
6810 asic_prop->pmmu_huge.start_addr,
6811 asic_prop->pmmu_huge.end_addr))
6812 return 0;
6813
6814 } else if (gaudi2_host_phys_addr_valid((u64) (uintptr_t) parser->user_cb)) {
6815 if (!hdev->pdev)
6816 return 0;
6817
6818 if (!device_iommu_mapped(&hdev->pdev->dev))
6819 return 0;
6820 }
6821
6822 dev_err(hdev->dev, "CB address %p + 0x%x for internal QMAN is not valid\n",
6823 parser->user_cb, parser->user_cb_size);
6824
6825 return -EFAULT;
6826 }
6827
gaudi2_cs_parser(struct hl_device * hdev,struct hl_cs_parser * parser)6828 static int gaudi2_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
6829 {
6830 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6831
6832 if (!parser->is_kernel_allocated_cb)
6833 return gaudi2_validate_cb_address(hdev, parser);
6834
6835 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) {
6836 dev_err(hdev->dev, "PMMU not initialized - Unsupported mode in Gaudi2\n");
6837 return -EINVAL;
6838 }
6839
6840 return 0;
6841 }
6842
gaudi2_send_heartbeat(struct hl_device * hdev)6843 static int gaudi2_send_heartbeat(struct hl_device *hdev)
6844 {
6845 struct gaudi2_device *gaudi2 = hdev->asic_specific;
6846
6847 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
6848 return 0;
6849
6850 return hl_fw_send_heartbeat(hdev);
6851 }
6852
6853 /* This is an internal helper function, used to update the KDMA mmu props.
6854 * Should be called with a proper kdma lock.
6855 */
gaudi2_kdma_set_mmbp_asid(struct hl_device * hdev,bool mmu_bypass,u32 asid)6856 static void gaudi2_kdma_set_mmbp_asid(struct hl_device *hdev,
6857 bool mmu_bypass, u32 asid)
6858 {
6859 u32 rw_asid, rw_mmu_bp;
6860
6861 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
6862 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
6863
6864 rw_mmu_bp = (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_SHIFT) |
6865 (!!mmu_bypass << ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_SHIFT);
6866
6867 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_ASID, rw_asid);
6868 WREG32(mmARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP, rw_mmu_bp);
6869 }
6870
gaudi2_arm_cq_monitor(struct hl_device * hdev,u32 sob_id,u32 mon_id,u32 cq_id,u32 mon_payload,u32 sync_value)6871 static void gaudi2_arm_cq_monitor(struct hl_device *hdev, u32 sob_id, u32 mon_id, u32 cq_id,
6872 u32 mon_payload, u32 sync_value)
6873 {
6874 u32 sob_offset, mon_offset, sync_group_id, mode, mon_arm;
6875 u8 mask;
6876
6877 sob_offset = sob_id * 4;
6878 mon_offset = mon_id * 4;
6879
6880 /* Reset the SOB value */
6881 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset, 0);
6882
6883 /* Configure this address with CQ_ID 0 because CQ_EN is set */
6884 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_offset, cq_id);
6885
6886 /* Configure this address with CS index because CQ_EN is set */
6887 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_offset, mon_payload);
6888
6889 sync_group_id = sob_id / 8;
6890 mask = ~(1 << (sob_id & 0x7));
6891 mode = 1; /* comparison mode is "equal to" */
6892
6893 mon_arm = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOD_MASK, sync_value);
6894 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SOP_MASK, mode);
6895 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_MASK_MASK, mask);
6896 mon_arm |= FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_ARM_SID_MASK, sync_group_id);
6897 WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + mon_offset, mon_arm);
6898 }
6899
6900 /* This is an internal helper function used by gaudi2_send_job_to_kdma only */
gaudi2_send_job_to_kdma(struct hl_device * hdev,u64 src_addr,u64 dst_addr,u32 size,bool is_memset)6901 static int gaudi2_send_job_to_kdma(struct hl_device *hdev,
6902 u64 src_addr, u64 dst_addr,
6903 u32 size, bool is_memset)
6904 {
6905 u32 comp_val, commit_mask, *polling_addr, timeout, status = 0;
6906 struct hl_cq_entry *cq_base;
6907 struct hl_cq *cq;
6908 u64 comp_addr;
6909 int rc;
6910
6911 gaudi2_arm_cq_monitor(hdev, GAUDI2_RESERVED_SOB_KDMA_COMPLETION,
6912 GAUDI2_RESERVED_MON_KDMA_COMPLETION,
6913 GAUDI2_RESERVED_CQ_KDMA_COMPLETION, 1, 1);
6914
6915 comp_addr = CFG_BASE + mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 +
6916 (GAUDI2_RESERVED_SOB_KDMA_COMPLETION * sizeof(u32));
6917
6918 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
6919 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
6920
6921 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_LO, lower_32_bits(src_addr));
6922 WREG32(mmARC_FARM_KDMA_CTX_SRC_BASE_HI, upper_32_bits(src_addr));
6923 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_LO, lower_32_bits(dst_addr));
6924 WREG32(mmARC_FARM_KDMA_CTX_DST_BASE_HI, upper_32_bits(dst_addr));
6925 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_LO, lower_32_bits(comp_addr));
6926 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_ADDR_HI, upper_32_bits(comp_addr));
6927 WREG32(mmARC_FARM_KDMA_CTX_WR_COMP_WDATA, comp_val);
6928 WREG32(mmARC_FARM_KDMA_CTX_DST_TSIZE_0, size);
6929
6930 commit_mask = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
6931 FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
6932
6933 if (is_memset)
6934 commit_mask |= FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1);
6935
6936 WREG32(mmARC_FARM_KDMA_CTX_COMMIT, commit_mask);
6937
6938 /* Wait for completion */
6939 cq = &hdev->completion_queue[GAUDI2_RESERVED_CQ_KDMA_COMPLETION];
6940 cq_base = cq->kernel_address;
6941 polling_addr = (u32 *)&cq_base[cq->ci];
6942
6943 if (hdev->pldm)
6944 /* for each 1MB 20 second of timeout */
6945 timeout = ((size / SZ_1M) + 1) * USEC_PER_SEC * 20;
6946 else
6947 timeout = KDMA_TIMEOUT_USEC;
6948
6949 /* Polling */
6950 rc = hl_poll_timeout_memory(
6951 hdev,
6952 polling_addr,
6953 status,
6954 (status == 1),
6955 1000,
6956 timeout,
6957 true);
6958
6959 *polling_addr = 0;
6960
6961 if (rc) {
6962 dev_err(hdev->dev, "Timeout while waiting for KDMA to be idle\n");
6963 WREG32(mmARC_FARM_KDMA_CFG_1, 1 << ARC_FARM_KDMA_CFG_1_HALT_SHIFT);
6964 return rc;
6965 }
6966
6967 cq->ci = hl_cq_inc_ptr(cq->ci);
6968
6969 return 0;
6970 }
6971
gaudi2_memset_device_lbw(struct hl_device * hdev,u32 addr,u32 size,u32 val)6972 static void gaudi2_memset_device_lbw(struct hl_device *hdev, u32 addr, u32 size, u32 val)
6973 {
6974 u32 i;
6975
6976 for (i = 0 ; i < size ; i += sizeof(u32))
6977 WREG32(addr + i, val);
6978 }
6979
gaudi2_qman_set_test_mode(struct hl_device * hdev,u32 hw_queue_id,bool enable)6980 static void gaudi2_qman_set_test_mode(struct hl_device *hdev, u32 hw_queue_id, bool enable)
6981 {
6982 u32 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
6983
6984 if (enable) {
6985 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED_TEST_MODE);
6986 WREG32(reg_base + QM_PQC_CFG_OFFSET, 0);
6987 } else {
6988 WREG32(reg_base + QM_GLBL_PROT_OFFSET, QMAN_MAKE_TRUSTED);
6989 WREG32(reg_base + QM_PQC_CFG_OFFSET, 1 << PDMA0_QM_PQC_CFG_EN_SHIFT);
6990 }
6991 }
6992
gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device * hdev,u32 hw_queue_id)6993 static inline u32 gaudi2_test_queue_hw_queue_id_to_sob_id(struct hl_device *hdev, u32 hw_queue_id)
6994 {
6995 return hdev->asic_prop.first_available_user_sob[0] +
6996 hw_queue_id - GAUDI2_QUEUE_ID_PDMA_0_0;
6997 }
6998
gaudi2_test_queue_clear(struct hl_device * hdev,u32 hw_queue_id)6999 static void gaudi2_test_queue_clear(struct hl_device *hdev, u32 hw_queue_id)
7000 {
7001 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7002 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7003
7004 /* Reset the SOB value */
7005 WREG32(sob_addr, 0);
7006 }
7007
gaudi2_test_queue_send_msg_short(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val,struct gaudi2_queues_test_info * msg_info)7008 static int gaudi2_test_queue_send_msg_short(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val,
7009 struct gaudi2_queues_test_info *msg_info)
7010 {
7011 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7012 u32 tmp, sob_base = 1;
7013 struct packet_msg_short *msg_short_pkt = msg_info->kern_addr;
7014 size_t pkt_size = sizeof(struct packet_msg_short);
7015 int rc;
7016
7017 tmp = (PACKET_MSG_SHORT << GAUDI2_PKT_CTL_OPCODE_SHIFT) |
7018 (1 << GAUDI2_PKT_CTL_EB_SHIFT) |
7019 (1 << GAUDI2_PKT_CTL_MB_SHIFT) |
7020 (sob_base << GAUDI2_PKT_SHORT_CTL_BASE_SHIFT) |
7021 (sob_offset << GAUDI2_PKT_SHORT_CTL_ADDR_SHIFT);
7022
7023 msg_short_pkt->value = cpu_to_le32(sob_val);
7024 msg_short_pkt->ctl = cpu_to_le32(tmp);
7025
7026 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr);
7027 if (rc)
7028 dev_err(hdev->dev,
7029 "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id);
7030
7031 return rc;
7032 }
7033
gaudi2_test_queue_wait_completion(struct hl_device * hdev,u32 hw_queue_id,u32 sob_val)7034 static int gaudi2_test_queue_wait_completion(struct hl_device *hdev, u32 hw_queue_id, u32 sob_val)
7035 {
7036 u32 sob_offset = gaudi2_test_queue_hw_queue_id_to_sob_id(hdev, hw_queue_id) * 4;
7037 u32 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
7038 u32 timeout_usec, tmp;
7039 int rc;
7040
7041 if (hdev->pldm)
7042 timeout_usec = GAUDI2_PLDM_TEST_QUEUE_WAIT_USEC;
7043 else
7044 timeout_usec = GAUDI2_TEST_QUEUE_WAIT_USEC;
7045
7046 rc = hl_poll_timeout(
7047 hdev,
7048 sob_addr,
7049 tmp,
7050 (tmp == sob_val),
7051 1000,
7052 timeout_usec);
7053
7054 if (rc == -ETIMEDOUT) {
7055 dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n",
7056 hw_queue_id, tmp);
7057 rc = -EIO;
7058 }
7059
7060 return rc;
7061 }
7062
gaudi2_test_cpu_queue(struct hl_device * hdev)7063 static int gaudi2_test_cpu_queue(struct hl_device *hdev)
7064 {
7065 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7066
7067 /*
7068 * check capability here as send_cpu_message() won't update the result
7069 * value if no capability
7070 */
7071 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7072 return 0;
7073
7074 return hl_fw_test_cpu_queue(hdev);
7075 }
7076
gaudi2_test_queues(struct hl_device * hdev)7077 static int gaudi2_test_queues(struct hl_device *hdev)
7078 {
7079 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7080 struct gaudi2_queues_test_info *msg_info;
7081 u32 sob_val = 0x5a5a;
7082 int i, rc;
7083
7084 /* send test message on all enabled Qs */
7085 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7086 if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7087 continue;
7088
7089 msg_info = &gaudi2->queues_test_info[i - GAUDI2_QUEUE_ID_PDMA_0_0];
7090 gaudi2_qman_set_test_mode(hdev, i, true);
7091 gaudi2_test_queue_clear(hdev, i);
7092 rc = gaudi2_test_queue_send_msg_short(hdev, i, sob_val, msg_info);
7093 if (rc)
7094 goto done;
7095 }
7096
7097 rc = gaudi2_test_cpu_queue(hdev);
7098 if (rc)
7099 goto done;
7100
7101 /* verify that all messages were processed */
7102 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ; i++) {
7103 if (!gaudi2_is_queue_enabled(hdev, i) || gaudi2_is_edma_queue_id(i))
7104 continue;
7105
7106 rc = gaudi2_test_queue_wait_completion(hdev, i, sob_val);
7107 if (rc)
7108 /* chip is not usable, no need for cleanups, just bail-out with error */
7109 goto done;
7110
7111 gaudi2_test_queue_clear(hdev, i);
7112 gaudi2_qman_set_test_mode(hdev, i, false);
7113 }
7114
7115 done:
7116 return rc;
7117 }
7118
gaudi2_compute_reset_late_init(struct hl_device * hdev)7119 static int gaudi2_compute_reset_late_init(struct hl_device *hdev)
7120 {
7121 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7122 size_t irq_arr_size;
7123 int rc;
7124
7125 gaudi2_init_arcs(hdev);
7126
7127 rc = gaudi2_scrub_arcs_dccm(hdev);
7128 if (rc) {
7129 dev_err(hdev->dev, "Failed to scrub arcs DCCM\n");
7130 return rc;
7131 }
7132
7133 gaudi2_init_security(hdev);
7134
7135 /* Unmask all IRQs since some could have been received during the soft reset */
7136 irq_arr_size = gaudi2->num_of_valid_hw_events * sizeof(gaudi2->hw_events[0]);
7137 return hl_fw_unmask_irq_arr(hdev, gaudi2->hw_events, irq_arr_size);
7138 }
7139
gaudi2_get_edma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7140 static bool gaudi2_get_edma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7141 struct engines_data *e)
7142 {
7143 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7144 struct asic_fixed_properties *prop = &hdev->asic_prop;
7145 unsigned long *mask = (unsigned long *) mask_arr;
7146 const char *edma_fmt = "%-6d%-6d%-9s%#-14x%#-15x%#x\n";
7147 bool is_idle = true, is_eng_idle;
7148 int engine_idx, i, j;
7149 u64 offset;
7150
7151 if (e)
7152 hl_engine_data_sprintf(e,
7153 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7154 "---- ---- ------- ------------ ------------- -------------\n");
7155
7156 for (i = 0; i < NUM_OF_DCORES; i++) {
7157 for (j = 0 ; j < NUM_OF_EDMA_PER_DCORE ; j++) {
7158 int seq = i * NUM_OF_EDMA_PER_DCORE + j;
7159
7160 if (!(prop->edma_enabled_mask & BIT(seq)))
7161 continue;
7162
7163 engine_idx = GAUDI2_DCORE0_ENGINE_ID_EDMA_0 +
7164 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7165 offset = i * DCORE_OFFSET + j * DCORE_EDMA_OFFSET;
7166
7167 dma_core_sts0 = RREG32(mmDCORE0_EDMA0_CORE_STS0 + offset);
7168 dma_core_sts1 = RREG32(mmDCORE0_EDMA0_CORE_STS1 + offset);
7169
7170 qm_glbl_sts0 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS0 + offset);
7171 qm_glbl_sts1 = RREG32(mmDCORE0_EDMA0_QM_GLBL_STS1 + offset);
7172 qm_cgm_sts = RREG32(mmDCORE0_EDMA0_QM_CGM_STS + offset);
7173
7174 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7175 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7176 is_idle &= is_eng_idle;
7177
7178 if (mask && !is_eng_idle)
7179 set_bit(engine_idx, mask);
7180
7181 if (e)
7182 hl_engine_data_sprintf(e, edma_fmt, i, j, is_eng_idle ? "Y" : "N",
7183 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7184 }
7185 }
7186
7187 return is_idle;
7188 }
7189
gaudi2_get_pdma_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7190 static bool gaudi2_get_pdma_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7191 struct engines_data *e)
7192 {
7193 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_sts0, dma_core_sts1;
7194 unsigned long *mask = (unsigned long *) mask_arr;
7195 const char *pdma_fmt = "%-6d%-9s%#-14x%#-15x%#x\n";
7196 bool is_idle = true, is_eng_idle;
7197 int engine_idx, i;
7198 u64 offset;
7199
7200 if (e)
7201 hl_engine_data_sprintf(e,
7202 "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0 DMA_CORE_STS1\n"
7203 "---- ------- ------------ ------------- -------------\n");
7204
7205 for (i = 0 ; i < NUM_OF_PDMA ; i++) {
7206 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i;
7207 offset = i * PDMA_OFFSET;
7208 dma_core_sts0 = RREG32(mmPDMA0_CORE_STS0 + offset);
7209 dma_core_sts1 = RREG32(mmPDMA0_CORE_STS1 + offset);
7210
7211 qm_glbl_sts0 = RREG32(mmPDMA0_QM_GLBL_STS0 + offset);
7212 qm_glbl_sts1 = RREG32(mmPDMA0_QM_GLBL_STS1 + offset);
7213 qm_cgm_sts = RREG32(mmPDMA0_QM_CGM_STS + offset);
7214
7215 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7216 IS_DMA_IDLE(dma_core_sts0) && !IS_DMA_HALTED(dma_core_sts1);
7217 is_idle &= is_eng_idle;
7218
7219 if (mask && !is_eng_idle)
7220 set_bit(engine_idx, mask);
7221
7222 if (e)
7223 hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N",
7224 qm_glbl_sts0, dma_core_sts0, dma_core_sts1);
7225 }
7226
7227 return is_idle;
7228 }
7229
gaudi2_get_nic_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7230 static bool gaudi2_get_nic_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7231 struct engines_data *e)
7232 {
7233 unsigned long *mask = (unsigned long *) mask_arr;
7234 const char *nic_fmt = "%-5d%-9s%#-14x%#-12x\n";
7235 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7236 bool is_idle = true, is_eng_idle;
7237 int engine_idx, i;
7238 u64 offset = 0;
7239
7240 /* NIC, twelve macros in Full chip */
7241 if (e && hdev->nic_ports_mask)
7242 hl_engine_data_sprintf(e,
7243 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n"
7244 "--- ------- ------------ ----------\n");
7245
7246 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) {
7247 if (!(i & 1))
7248 offset = i / 2 * NIC_OFFSET;
7249 else
7250 offset += NIC_QM_OFFSET;
7251
7252 if (!(hdev->nic_ports_mask & BIT(i)))
7253 continue;
7254
7255 engine_idx = GAUDI2_ENGINE_ID_NIC0_0 + i;
7256
7257
7258 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset);
7259 qm_glbl_sts1 = RREG32(mmNIC0_QM0_GLBL_STS1 + offset);
7260 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset);
7261
7262 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7263 is_idle &= is_eng_idle;
7264
7265 if (mask && !is_eng_idle)
7266 set_bit(engine_idx, mask);
7267
7268 if (e)
7269 hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N",
7270 qm_glbl_sts0, qm_cgm_sts);
7271 }
7272
7273 return is_idle;
7274 }
7275
gaudi2_get_mme_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7276 static bool gaudi2_get_mme_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7277 struct engines_data *e)
7278 {
7279 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, mme_arch_sts;
7280 unsigned long *mask = (unsigned long *) mask_arr;
7281 const char *mme_fmt = "%-5d%-6s%-9s%#-14x%#x\n";
7282 bool is_idle = true, is_eng_idle;
7283 int engine_idx, i;
7284 u64 offset;
7285
7286 if (e)
7287 hl_engine_data_sprintf(e,
7288 "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n"
7289 "--- ---- ------- ------------ ---------------\n");
7290 /* MME, one per Dcore */
7291 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7292 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET;
7293 offset = i * DCORE_OFFSET;
7294
7295 qm_glbl_sts0 = RREG32(mmDCORE0_MME_QM_GLBL_STS0 + offset);
7296 qm_glbl_sts1 = RREG32(mmDCORE0_MME_QM_GLBL_STS1 + offset);
7297 qm_cgm_sts = RREG32(mmDCORE0_MME_QM_CGM_STS + offset);
7298
7299 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7300 is_idle &= is_eng_idle;
7301
7302 mme_arch_sts = RREG32(mmDCORE0_MME_CTRL_LO_ARCH_STATUS + offset);
7303 is_eng_idle &= IS_MME_IDLE(mme_arch_sts);
7304 is_idle &= is_eng_idle;
7305
7306 if (e)
7307 hl_engine_data_sprintf(e, mme_fmt, i, "N",
7308 is_eng_idle ? "Y" : "N",
7309 qm_glbl_sts0,
7310 mme_arch_sts);
7311
7312 if (mask && !is_eng_idle)
7313 set_bit(engine_idx, mask);
7314 }
7315
7316 return is_idle;
7317 }
7318
gaudi2_is_tpc_engine_idle(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7319 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset,
7320 struct iterate_module_ctx *ctx)
7321 {
7322 struct gaudi2_tpc_idle_data *idle_data = ctx->data;
7323 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7324 bool is_eng_idle;
7325 int engine_idx;
7326
7327 if ((dcore == 0) && (inst == (NUM_DCORE0_TPC - 1)))
7328 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
7329 else
7330 engine_idx = GAUDI2_DCORE0_ENGINE_ID_TPC_0 +
7331 dcore * GAUDI2_ENGINE_ID_DCORE_OFFSET + inst;
7332
7333 tpc_cfg_sts = RREG32(mmDCORE0_TPC0_CFG_STATUS + offset);
7334 qm_glbl_sts0 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS0 + offset);
7335 qm_glbl_sts1 = RREG32(mmDCORE0_TPC0_QM_GLBL_STS1 + offset);
7336 qm_cgm_sts = RREG32(mmDCORE0_TPC0_QM_CGM_STS + offset);
7337
7338 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts) &&
7339 IS_TPC_IDLE(tpc_cfg_sts);
7340 *(idle_data->is_idle) &= is_eng_idle;
7341
7342 if (idle_data->mask && !is_eng_idle)
7343 set_bit(engine_idx, idle_data->mask);
7344
7345 if (idle_data->e)
7346 hl_engine_data_sprintf(idle_data->e,
7347 idle_data->tpc_fmt, dcore, inst,
7348 is_eng_idle ? "Y" : "N",
7349 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
7350 }
7351
gaudi2_get_tpc_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7352 static bool gaudi2_get_tpc_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7353 struct engines_data *e)
7354 {
7355 struct asic_fixed_properties *prop = &hdev->asic_prop;
7356 unsigned long *mask = (unsigned long *) mask_arr;
7357 bool is_idle = true;
7358
7359 struct gaudi2_tpc_idle_data tpc_idle_data = {
7360 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n",
7361 .e = e,
7362 .mask = mask,
7363 .is_idle = &is_idle,
7364 };
7365 struct iterate_module_ctx tpc_iter = {
7366 .fn = &gaudi2_is_tpc_engine_idle,
7367 .data = &tpc_idle_data,
7368 };
7369
7370 if (e && prop->tpc_enabled_mask)
7371 hl_engine_data_sprintf(e,
7372 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS STATUS\n"
7373 "---- --- ------- ------------ ---------- ------\n");
7374
7375 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7376
7377 return *tpc_idle_data.is_idle;
7378 }
7379
gaudi2_get_decoder_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7380 static bool gaudi2_get_decoder_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7381 struct engines_data *e)
7382 {
7383 struct asic_fixed_properties *prop = &hdev->asic_prop;
7384 unsigned long *mask = (unsigned long *) mask_arr;
7385 const char *pcie_dec_fmt = "%-10d%-9s%#x\n";
7386 const char *dec_fmt = "%-6d%-5d%-9s%#x\n";
7387 bool is_idle = true, is_eng_idle;
7388 u32 dec_swreg15, dec_enabled_bit;
7389 int engine_idx, i, j;
7390 u64 offset;
7391
7392 /* Decoders, two each Dcore and two shared PCIe decoders */
7393 if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK)))
7394 hl_engine_data_sprintf(e,
7395 "\nCORE DEC is_idle VSI_CMD_SWREG15\n"
7396 "---- --- ------- ---------------\n");
7397
7398 for (i = 0 ; i < NUM_OF_DCORES ; i++) {
7399 for (j = 0 ; j < NUM_OF_DEC_PER_DCORE ; j++) {
7400 dec_enabled_bit = 1 << (i * NUM_OF_DEC_PER_DCORE + j);
7401 if (!(prop->decoder_enabled_mask & dec_enabled_bit))
7402 continue;
7403
7404 engine_idx = GAUDI2_DCORE0_ENGINE_ID_DEC_0 +
7405 i * GAUDI2_ENGINE_ID_DCORE_OFFSET + j;
7406 offset = i * DCORE_OFFSET + j * DCORE_DEC_OFFSET;
7407
7408 dec_swreg15 = RREG32(mmDCORE0_DEC0_CMD_SWREG15 + offset);
7409 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7410 is_idle &= is_eng_idle;
7411
7412 if (mask && !is_eng_idle)
7413 set_bit(engine_idx, mask);
7414
7415 if (e)
7416 hl_engine_data_sprintf(e, dec_fmt, i, j,
7417 is_eng_idle ? "Y" : "N", dec_swreg15);
7418 }
7419 }
7420
7421 if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK))
7422 hl_engine_data_sprintf(e,
7423 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n"
7424 "-------- ------- ---------------\n");
7425
7426 /* Check shared(PCIe) decoders */
7427 for (i = 0 ; i < NUM_OF_DEC_PER_DCORE ; i++) {
7428 dec_enabled_bit = PCIE_DEC_SHIFT + i;
7429 if (!(prop->decoder_enabled_mask & BIT(dec_enabled_bit)))
7430 continue;
7431
7432 engine_idx = GAUDI2_PCIE_ENGINE_ID_DEC_0 + i;
7433 offset = i * DCORE_DEC_OFFSET;
7434 dec_swreg15 = RREG32(mmPCIE_DEC0_CMD_SWREG15 + offset);
7435 is_eng_idle = IS_DEC_IDLE(dec_swreg15);
7436 is_idle &= is_eng_idle;
7437
7438 if (mask && !is_eng_idle)
7439 set_bit(engine_idx, mask);
7440
7441 if (e)
7442 hl_engine_data_sprintf(e, pcie_dec_fmt, i,
7443 is_eng_idle ? "Y" : "N", dec_swreg15);
7444 }
7445
7446 return is_idle;
7447 }
7448
gaudi2_get_rotator_idle_status(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7449 static bool gaudi2_get_rotator_idle_status(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7450 struct engines_data *e)
7451 {
7452 const char *rot_fmt = "%-6d%-5d%-9s%#-14x%#-14x%#x\n";
7453 unsigned long *mask = (unsigned long *) mask_arr;
7454 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts;
7455 bool is_idle = true, is_eng_idle;
7456 int engine_idx, i;
7457 u64 offset;
7458
7459 if (e)
7460 hl_engine_data_sprintf(e,
7461 "\nCORE ROT is_idle QM_GLBL_STS0 QM_GLBL_STS1 QM_CGM_STS\n"
7462 "---- --- ------- ------------ ------------ ----------\n");
7463
7464 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7465 engine_idx = GAUDI2_ENGINE_ID_ROT_0 + i;
7466
7467 offset = i * ROT_OFFSET;
7468
7469 qm_glbl_sts0 = RREG32(mmROT0_QM_GLBL_STS0 + offset);
7470 qm_glbl_sts1 = RREG32(mmROT0_QM_GLBL_STS1 + offset);
7471 qm_cgm_sts = RREG32(mmROT0_QM_CGM_STS + offset);
7472
7473 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7474 is_idle &= is_eng_idle;
7475
7476 if (mask && !is_eng_idle)
7477 set_bit(engine_idx, mask);
7478
7479 if (e)
7480 hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N",
7481 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts);
7482 }
7483
7484 return is_idle;
7485 }
7486
gaudi2_is_device_idle(struct hl_device * hdev,u64 * mask_arr,u8 mask_len,struct engines_data * e)7487 static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
7488 struct engines_data *e)
7489 {
7490 bool is_idle = true;
7491
7492 is_idle &= gaudi2_get_edma_idle_status(hdev, mask_arr, mask_len, e);
7493 is_idle &= gaudi2_get_pdma_idle_status(hdev, mask_arr, mask_len, e);
7494 is_idle &= gaudi2_get_nic_idle_status(hdev, mask_arr, mask_len, e);
7495 is_idle &= gaudi2_get_mme_idle_status(hdev, mask_arr, mask_len, e);
7496 is_idle &= gaudi2_get_tpc_idle_status(hdev, mask_arr, mask_len, e);
7497 is_idle &= gaudi2_get_decoder_idle_status(hdev, mask_arr, mask_len, e);
7498 is_idle &= gaudi2_get_rotator_idle_status(hdev, mask_arr, mask_len, e);
7499
7500 return is_idle;
7501 }
7502
gaudi2_hw_queues_lock(struct hl_device * hdev)7503 static void gaudi2_hw_queues_lock(struct hl_device *hdev)
7504 __acquires(&gaudi2->hw_queues_lock)
7505 {
7506 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7507
7508 spin_lock(&gaudi2->hw_queues_lock);
7509 }
7510
gaudi2_hw_queues_unlock(struct hl_device * hdev)7511 static void gaudi2_hw_queues_unlock(struct hl_device *hdev)
7512 __releases(&gaudi2->hw_queues_lock)
7513 {
7514 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7515
7516 spin_unlock(&gaudi2->hw_queues_lock);
7517 }
7518
gaudi2_get_pci_id(struct hl_device * hdev)7519 static u32 gaudi2_get_pci_id(struct hl_device *hdev)
7520 {
7521 return hdev->pdev->device;
7522 }
7523
gaudi2_get_eeprom_data(struct hl_device * hdev,void * data,size_t max_size)7524 static int gaudi2_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
7525 {
7526 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7527
7528 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
7529 return 0;
7530
7531 return hl_fw_get_eeprom_data(hdev, data, max_size);
7532 }
7533
gaudi2_update_eq_ci(struct hl_device * hdev,u32 val)7534 static void gaudi2_update_eq_ci(struct hl_device *hdev, u32 val)
7535 {
7536 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
7537 }
7538
gaudi2_get_events_stat(struct hl_device * hdev,bool aggregate,u32 * size)7539 static void *gaudi2_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
7540 {
7541 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7542
7543 if (aggregate) {
7544 *size = (u32) sizeof(gaudi2->events_stat_aggregate);
7545 return gaudi2->events_stat_aggregate;
7546 }
7547
7548 *size = (u32) sizeof(gaudi2->events_stat);
7549 return gaudi2->events_stat;
7550 }
7551
gaudi2_mmu_vdec_dcore_prepare(struct hl_device * hdev,int dcore_id,int dcore_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7552 static void gaudi2_mmu_vdec_dcore_prepare(struct hl_device *hdev, int dcore_id,
7553 int dcore_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7554 {
7555 u32 offset = (mmDCORE0_VDEC1_BRDG_CTRL_BASE - mmDCORE0_VDEC0_BRDG_CTRL_BASE) *
7556 dcore_vdec_id + DCORE_OFFSET * dcore_id;
7557
7558 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7559 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7560
7561 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7562 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7563
7564 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7565 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7566
7567 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7568 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7569
7570 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7571 WREG32(mmDCORE0_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7572 }
7573
gaudi2_mmu_dcore_prepare(struct hl_device * hdev,int dcore_id,u32 asid)7574 static void gaudi2_mmu_dcore_prepare(struct hl_device *hdev, int dcore_id, u32 asid)
7575 {
7576 u32 rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7577 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7578 struct asic_fixed_properties *prop = &hdev->asic_prop;
7579 u32 dcore_offset = dcore_id * DCORE_OFFSET;
7580 u32 vdec_id, i, ports_offset, reg_val;
7581 u8 edma_seq_base;
7582
7583 /* EDMA */
7584 edma_seq_base = dcore_id * NUM_OF_EDMA_PER_DCORE;
7585 if (prop->edma_enabled_mask & BIT(edma_seq_base)) {
7586 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7587 WREG32(mmDCORE0_EDMA0_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7588 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7589 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7590 }
7591
7592 if (prop->edma_enabled_mask & BIT(edma_seq_base + 1)) {
7593 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7594 WREG32(mmDCORE0_EDMA1_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7595 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_ASID + dcore_offset, rw_asid);
7596 WREG32(mmDCORE0_EDMA1_CORE_CTX_AXUSER_HB_MMU_BP + dcore_offset, 0);
7597 }
7598
7599 /* Sync Mngr */
7600 WREG32(mmDCORE0_SYNC_MNGR_GLBL_ASID_NONE_SEC_PRIV + dcore_offset, asid);
7601 /*
7602 * Sync Mngrs on dcores 1 - 3 are exposed to user, so must use user ASID
7603 * for any access type
7604 */
7605 if (dcore_id > 0) {
7606 reg_val = (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_RD_SHIFT) |
7607 (asid << DCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID_WR_SHIFT);
7608 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_ASID + dcore_offset, reg_val);
7609 WREG32(mmDCORE0_SYNC_MNGR_MSTR_IF_AXUSER_HB_MMU_BP + dcore_offset, 0);
7610 }
7611
7612 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_MMU_BP + dcore_offset, 0);
7613 WREG32(mmDCORE0_MME_CTRL_LO_MME_AXUSER_HB_ASID + dcore_offset, rw_asid);
7614
7615 for (i = 0 ; i < NUM_OF_MME_SBTE_PORTS ; i++) {
7616 ports_offset = i * DCORE_MME_SBTE_OFFSET;
7617 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_MMU_BP +
7618 dcore_offset + ports_offset, 0);
7619 WREG32(mmDCORE0_MME_SBTE0_MSTR_IF_AXUSER_HB_ASID +
7620 dcore_offset + ports_offset, rw_asid);
7621 }
7622
7623 for (i = 0 ; i < NUM_OF_MME_WB_PORTS ; i++) {
7624 ports_offset = i * DCORE_MME_WB_OFFSET;
7625 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_MMU_BP +
7626 dcore_offset + ports_offset, 0);
7627 WREG32(mmDCORE0_MME_WB0_MSTR_IF_AXUSER_HB_ASID +
7628 dcore_offset + ports_offset, rw_asid);
7629 }
7630
7631 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_MMU_BP + dcore_offset, 0);
7632 WREG32(mmDCORE0_MME_QM_AXUSER_NONSECURED_HB_ASID + dcore_offset, rw_asid);
7633
7634 /*
7635 * Decoders
7636 */
7637 for (vdec_id = 0 ; vdec_id < NUM_OF_DEC_PER_DCORE ; vdec_id++) {
7638 if (prop->decoder_enabled_mask & BIT(dcore_id * NUM_OF_DEC_PER_DCORE + vdec_id))
7639 gaudi2_mmu_vdec_dcore_prepare(hdev, dcore_id, vdec_id, rw_asid, 0);
7640 }
7641 }
7642
gudi2_mmu_vdec_shared_prepare(struct hl_device * hdev,int shared_vdec_id,u32 rw_asid,u32 rw_mmu_bp)7643 static void gudi2_mmu_vdec_shared_prepare(struct hl_device *hdev,
7644 int shared_vdec_id, u32 rw_asid, u32 rw_mmu_bp)
7645 {
7646 u32 offset = (mmPCIE_VDEC1_BRDG_CTRL_BASE - mmPCIE_VDEC0_BRDG_CTRL_BASE) * shared_vdec_id;
7647
7648 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_MMU_BP + offset, rw_mmu_bp);
7649 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_DEC_HB_ASID + offset, rw_asid);
7650
7651 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_MMU_BP + offset, rw_mmu_bp);
7652 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_ABNRM_HB_ASID + offset, rw_asid);
7653
7654 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_MMU_BP + offset, rw_mmu_bp);
7655 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_L2C_HB_ASID + offset, rw_asid);
7656
7657 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_MMU_BP + offset, rw_mmu_bp);
7658 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_NRM_HB_ASID + offset, rw_asid);
7659
7660 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_MMU_BP + offset, rw_mmu_bp);
7661 WREG32(mmPCIE_VDEC0_BRDG_CTRL_AXUSER_MSIX_VCD_HB_ASID + offset, rw_asid);
7662 }
7663
gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device * hdev,int arc_farm_id,u32 rw_asid,u32 rw_mmu_bp)7664 static void gudi2_mmu_arc_farm_arc_dup_eng_prepare(struct hl_device *hdev, int arc_farm_id,
7665 u32 rw_asid, u32 rw_mmu_bp)
7666 {
7667 u32 offset = (mmARC_FARM_ARC1_DUP_ENG_BASE - mmARC_FARM_ARC0_DUP_ENG_BASE) * arc_farm_id;
7668
7669 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_MMU_BP + offset, rw_mmu_bp);
7670 WREG32(mmARC_FARM_ARC0_DUP_ENG_AXUSER_HB_ASID + offset, rw_asid);
7671 }
7672
gaudi2_arc_mmu_prepare(struct hl_device * hdev,u32 cpu_id,u32 asid)7673 static void gaudi2_arc_mmu_prepare(struct hl_device *hdev, u32 cpu_id, u32 asid)
7674 {
7675 u32 reg_base, reg_offset, reg_val = 0;
7676
7677 reg_base = gaudi2_arc_blocks_bases[cpu_id];
7678
7679 /* Enable MMU and configure asid for all relevant ARC regions */
7680 reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_MMU_BP_MASK, 0);
7681 reg_val |= FIELD_PREP(ARC_FARM_ARC0_AUX_ARC_REGION_CFG_0_ASID_MASK, asid);
7682
7683 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION3_GENERAL);
7684 WREG32(reg_base + reg_offset, reg_val);
7685
7686 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION4_HBM0_FW);
7687 WREG32(reg_base + reg_offset, reg_val);
7688
7689 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION5_HBM1_GC_DATA);
7690 WREG32(reg_base + reg_offset, reg_val);
7691
7692 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION6_HBM2_GC_DATA);
7693 WREG32(reg_base + reg_offset, reg_val);
7694
7695 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION7_HBM3_GC_DATA);
7696 WREG32(reg_base + reg_offset, reg_val);
7697
7698 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION9_PCIE);
7699 WREG32(reg_base + reg_offset, reg_val);
7700
7701 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION10_GENERAL);
7702 WREG32(reg_base + reg_offset, reg_val);
7703
7704 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION11_GENERAL);
7705 WREG32(reg_base + reg_offset, reg_val);
7706
7707 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION12_GENERAL);
7708 WREG32(reg_base + reg_offset, reg_val);
7709
7710 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION13_GENERAL);
7711 WREG32(reg_base + reg_offset, reg_val);
7712
7713 reg_offset = ARC_REGION_CFG_OFFSET(ARC_REGION14_GENERAL);
7714 WREG32(reg_base + reg_offset, reg_val);
7715 }
7716
gaudi2_arc_mmu_prepare_all(struct hl_device * hdev,u32 asid)7717 static int gaudi2_arc_mmu_prepare_all(struct hl_device *hdev, u32 asid)
7718 {
7719 int i;
7720
7721 if (hdev->fw_components & FW_TYPE_BOOT_CPU)
7722 return hl_fw_cpucp_engine_core_asid_set(hdev, asid);
7723
7724 for (i = CPU_ID_SCHED_ARC0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7725 gaudi2_arc_mmu_prepare(hdev, i, asid);
7726
7727 for (i = GAUDI2_QUEUE_ID_PDMA_0_0 ; i < GAUDI2_QUEUE_ID_CPU_PQ ; i += 4) {
7728 if (!gaudi2_is_queue_enabled(hdev, i))
7729 continue;
7730
7731 gaudi2_arc_mmu_prepare(hdev, gaudi2_queue_id_to_arc_id[i], asid);
7732 }
7733
7734 return 0;
7735 }
7736
gaudi2_mmu_shared_prepare(struct hl_device * hdev,u32 asid)7737 static int gaudi2_mmu_shared_prepare(struct hl_device *hdev, u32 asid)
7738 {
7739 struct asic_fixed_properties *prop = &hdev->asic_prop;
7740 u32 rw_asid, offset;
7741 int rc, i;
7742
7743 rw_asid = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_MASK, asid) |
7744 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_MASK, asid);
7745
7746 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7747 WREG32(mmPDMA0_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7748 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7749 WREG32(mmPDMA0_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7750
7751 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_ASID, rw_asid);
7752 WREG32(mmPDMA1_QM_AXUSER_NONSECURED_HB_MMU_BP, 0);
7753 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_ASID, rw_asid);
7754 WREG32(mmPDMA1_CORE_CTX_AXUSER_HB_MMU_BP, 0);
7755
7756 /* ROT */
7757 for (i = 0 ; i < NUM_OF_ROT ; i++) {
7758 offset = i * ROT_OFFSET;
7759 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_ASID + offset, rw_asid);
7760 WREG32(mmROT0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7761 RMWREG32(mmROT0_CPL_QUEUE_AWUSER + offset, asid, MMUBP_ASID_MASK);
7762 RMWREG32(mmROT0_DESC_HBW_ARUSER_LO + offset, asid, MMUBP_ASID_MASK);
7763 RMWREG32(mmROT0_DESC_HBW_AWUSER_LO + offset, asid, MMUBP_ASID_MASK);
7764 }
7765
7766 /* Shared Decoders are the last bits in the decoders mask */
7767 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 0))
7768 gudi2_mmu_vdec_shared_prepare(hdev, 0, rw_asid, 0);
7769
7770 if (prop->decoder_enabled_mask & BIT(NUM_OF_DCORES * NUM_OF_DEC_PER_DCORE + 1))
7771 gudi2_mmu_vdec_shared_prepare(hdev, 1, rw_asid, 0);
7772
7773 /* arc farm arc dup eng */
7774 for (i = 0 ; i < NUM_OF_ARC_FARMS_ARC ; i++)
7775 gudi2_mmu_arc_farm_arc_dup_eng_prepare(hdev, i, rw_asid, 0);
7776
7777 rc = gaudi2_arc_mmu_prepare_all(hdev, asid);
7778 if (rc)
7779 return rc;
7780
7781 return 0;
7782 }
7783
gaudi2_tpc_mmu_prepare(struct hl_device * hdev,int dcore,int inst,u32 offset,struct iterate_module_ctx * ctx)7784 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset,
7785 struct iterate_module_ctx *ctx)
7786 {
7787 struct gaudi2_tpc_mmu_data *mmu_data = ctx->data;
7788
7789 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0);
7790 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid);
7791 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_MMU_BP + offset, 0);
7792 WREG32(mmDCORE0_TPC0_QM_AXUSER_NONSECURED_HB_ASID + offset, mmu_data->rw_asid);
7793 }
7794
7795 /* zero the MMUBP and set the ASID */
gaudi2_mmu_prepare(struct hl_device * hdev,u32 asid)7796 static int gaudi2_mmu_prepare(struct hl_device *hdev, u32 asid)
7797 {
7798 struct gaudi2_device *gaudi2 = hdev->asic_specific;
7799 struct gaudi2_tpc_mmu_data tpc_mmu_data;
7800 struct iterate_module_ctx tpc_iter = {
7801 .fn = &gaudi2_tpc_mmu_prepare,
7802 .data = &tpc_mmu_data,
7803 };
7804 int rc, i;
7805
7806 if (asid & ~DCORE0_HMMU0_STLB_ASID_ASID_MASK) {
7807 dev_crit(hdev->dev, "asid %u is too big\n", asid);
7808 return -EINVAL;
7809 }
7810
7811 if (!(gaudi2->hw_cap_initialized & HW_CAP_MMU_MASK))
7812 return 0;
7813
7814 rc = gaudi2_mmu_shared_prepare(hdev, asid);
7815 if (rc)
7816 return rc;
7817
7818 /* configure DCORE MMUs */
7819 tpc_mmu_data.rw_asid = (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_RD_SHIFT) |
7820 (asid << ARC_FARM_KDMA_CTX_AXUSER_HB_ASID_WR_SHIFT);
7821 gaudi2_iterate_tpcs(hdev, &tpc_iter);
7822 for (i = 0 ; i < NUM_OF_DCORES ; i++)
7823 gaudi2_mmu_dcore_prepare(hdev, i, asid);
7824
7825 return 0;
7826 }
7827
is_info_event(u32 event)7828 static inline bool is_info_event(u32 event)
7829 {
7830 switch (event) {
7831 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
7832 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
7833 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY ... GAUDI2_EVENT_ARC_PWR_RD_MODE3:
7834
7835 /* return in case of NIC status event - these events are received periodically and not as
7836 * an indication to an error.
7837 */
7838 case GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0 ... GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1:
7839 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
7840 return true;
7841 default:
7842 return false;
7843 }
7844 }
7845
gaudi2_print_event(struct hl_device * hdev,u16 event_type,bool ratelimited,const char * fmt,...)7846 static void gaudi2_print_event(struct hl_device *hdev, u16 event_type,
7847 bool ratelimited, const char *fmt, ...)
7848 {
7849 struct va_format vaf;
7850 va_list args;
7851
7852 va_start(args, fmt);
7853 vaf.fmt = fmt;
7854 vaf.va = &args;
7855
7856 if (ratelimited)
7857 dev_err_ratelimited(hdev->dev, "%s: %pV\n",
7858 gaudi2_irq_map_table[event_type].valid ?
7859 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7860 else
7861 dev_err(hdev->dev, "%s: %pV\n",
7862 gaudi2_irq_map_table[event_type].valid ?
7863 gaudi2_irq_map_table[event_type].name : "N/A Event", &vaf);
7864
7865 va_end(args);
7866 }
7867
gaudi2_handle_ecc_event(struct hl_device * hdev,u16 event_type,struct hl_eq_ecc_data * ecc_data)7868 static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type,
7869 struct hl_eq_ecc_data *ecc_data)
7870 {
7871 u64 ecc_address = 0, ecc_syndrome = 0;
7872 u8 memory_wrapper_idx = 0;
7873 bool has_block_id = false;
7874 u16 block_id;
7875
7876 if (hl_fw_version_cmp(hdev, 1, 12, 0) >= 0)
7877 has_block_id = true;
7878
7879 ecc_address = le64_to_cpu(ecc_data->ecc_address);
7880 ecc_syndrome = le64_to_cpu(ecc_data->ecc_syndrom);
7881 memory_wrapper_idx = ecc_data->memory_wrapper_idx;
7882
7883 if (has_block_id) {
7884 block_id = le16_to_cpu(ecc_data->block_id);
7885 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7886 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. block id %#x. critical %u.",
7887 ecc_address, ecc_syndrome, memory_wrapper_idx, block_id,
7888 ecc_data->is_critical);
7889 } else {
7890 gaudi2_print_event(hdev, event_type, !ecc_data->is_critical,
7891 "ECC error detected. address: %#llx. Syndrome: %#llx. wrapper id %u. critical %u.",
7892 ecc_address, ecc_syndrome, memory_wrapper_idx, ecc_data->is_critical);
7893 }
7894
7895 return !!ecc_data->is_critical;
7896 }
7897
handle_lower_qman_data_on_err(struct hl_device * hdev,u64 qman_base,u32 engine_id)7898 static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id)
7899 {
7900 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode;
7901 u64 cq_ptr, cp_current_inst;
7902 u32 lo, hi, cq_size, cp_sts;
7903 bool is_arc_cq;
7904
7905 cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET);
7906 is_arc_cq = FIELD_GET(PDMA0_QM_CP_STS_CUR_CQ_MASK, cp_sts); /* 0 - legacy CQ, 1 - ARC_CQ */
7907
7908 if (is_arc_cq) {
7909 lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET);
7910 hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET);
7911 cq_ptr = ((u64) hi) << 32 | lo;
7912 cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET);
7913 } else {
7914 lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET);
7915 hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET);
7916 cq_ptr = ((u64) hi) << 32 | lo;
7917 cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET);
7918 }
7919
7920 lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET);
7921 hi = RREG32(qman_base + QM_CP_CURRENT_INST_HI_4_OFFSET);
7922 cp_current_inst = ((u64) hi) << 32 | lo;
7923
7924 dev_info(hdev->dev,
7925 "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n",
7926 is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst);
7927
7928 if (undef_opcode->write_enable) {
7929 memset(undef_opcode, 0, sizeof(*undef_opcode));
7930 undef_opcode->timestamp = ktime_get();
7931 undef_opcode->cq_addr = cq_ptr;
7932 undef_opcode->cq_size = cq_size;
7933 undef_opcode->engine_id = engine_id;
7934 undef_opcode->stream_id = QMAN_STREAMS;
7935 undef_opcode->write_enable = 0;
7936 }
7937 }
7938
gaudi2_handle_qman_err_generic(struct hl_device * hdev,u16 event_type,u64 qman_base,u32 qid_base,u64 * event_mask)7939 static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type,
7940 u64 qman_base, u32 qid_base, u64 *event_mask)
7941 {
7942 u32 i, j, glbl_sts_val, arb_err_val, num_error_causes, error_count = 0;
7943 u64 glbl_sts_addr, arb_err_addr;
7944 char reg_desc[32];
7945
7946 glbl_sts_addr = qman_base + (mmDCORE0_TPC0_QM_GLBL_ERR_STS_0 - mmDCORE0_TPC0_QM_BASE);
7947 arb_err_addr = qman_base + (mmDCORE0_TPC0_QM_ARB_ERR_CAUSE - mmDCORE0_TPC0_QM_BASE);
7948
7949 /* Iterate through all stream GLBL_ERR_STS registers + Lower CP */
7950 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
7951 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
7952
7953 if (!glbl_sts_val)
7954 continue;
7955
7956 if (i == QMAN_STREAMS) {
7957 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerQM");
7958 num_error_causes = GAUDI2_NUM_OF_LOWER_QM_ERR_CAUSE;
7959 } else {
7960 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
7961 num_error_causes = GAUDI2_NUM_OF_QM_ERR_CAUSE;
7962 }
7963
7964 for (j = 0 ; j < num_error_causes ; j++)
7965 if (glbl_sts_val & BIT(j)) {
7966 gaudi2_print_event(hdev, event_type, true,
7967 "%s. err cause: %s", reg_desc,
7968 i == QMAN_STREAMS ?
7969 gaudi2_lower_qman_error_cause[j] :
7970 gaudi2_qman_error_cause[j]);
7971 error_count++;
7972 }
7973
7974 /* Check for undefined opcode error in lower QM */
7975 if ((i == QMAN_STREAMS) &&
7976 (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) {
7977 handle_lower_qman_data_on_err(hdev, qman_base,
7978 gaudi2_queue_id_to_engine_id[qid_base]);
7979 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
7980 }
7981 }
7982
7983 arb_err_val = RREG32(arb_err_addr);
7984
7985 if (!arb_err_val)
7986 goto out;
7987
7988 for (j = 0 ; j < GAUDI2_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
7989 if (arb_err_val & BIT(j)) {
7990 gaudi2_print_event(hdev, event_type, true,
7991 "ARB_ERR. err cause: %s",
7992 gaudi2_qman_arb_error_cause[j]);
7993 error_count++;
7994 }
7995 }
7996
7997 out:
7998 return error_count;
7999 }
8000
gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)8001 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
8002 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8003 enum gaudi2_engine_id id, u64 *event_mask)
8004 {
8005 u32 razwi_hi, razwi_lo, razwi_xy;
8006 u16 eng_id = id;
8007 u8 rd_wr_flag;
8008
8009 if (is_write) {
8010 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HI);
8011 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_LO);
8012 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_XY);
8013 rd_wr_flag = HL_RAZWI_WRITE;
8014 } else {
8015 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
8016 razwi_lo = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_LO);
8017 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_XY);
8018 rd_wr_flag = HL_RAZWI_READ;
8019 }
8020
8021 hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
8022 rd_wr_flag | HL_RAZWI_HBW, event_mask);
8023
8024 dev_err_ratelimited(hdev->dev,
8025 "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
8026 name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
8027 }
8028
gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device * hdev,u64 rtr_mstr_if_base_addr,bool is_write,char * name,enum gaudi2_engine_id id,u64 * event_mask)8029 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
8030 u64 rtr_mstr_if_base_addr, bool is_write, char *name,
8031 enum gaudi2_engine_id id, u64 *event_mask)
8032 {
8033 u64 razwi_addr = CFG_BASE;
8034 u32 razwi_xy;
8035 u16 eng_id = id;
8036 u8 rd_wr_flag;
8037
8038 if (is_write) {
8039 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI);
8040 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_XY);
8041 rd_wr_flag = HL_RAZWI_WRITE;
8042 } else {
8043 razwi_addr += RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
8044 razwi_xy = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_XY);
8045 rd_wr_flag = HL_RAZWI_READ;
8046 }
8047
8048 hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
8049 dev_err_ratelimited(hdev->dev,
8050 "%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%llX Initiator coordinates 0x%x\n",
8051 name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
8052 razwi_xy);
8053 }
8054
gaudi2_razwi_calc_engine_id(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx)8055 static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
8056 enum razwi_event_sources module, u8 module_idx)
8057 {
8058 switch (module) {
8059 case RAZWI_TPC:
8060 if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
8061 return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
8062 return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8063 (module_idx % NUM_OF_TPC_PER_DCORE) +
8064 (GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8065
8066 case RAZWI_MME:
8067 return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
8068 (module_idx * ENGINE_ID_DCORE_OFFSET));
8069
8070 case RAZWI_EDMA:
8071 return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8072 (module_idx % NUM_OF_EDMA_PER_DCORE));
8073
8074 case RAZWI_PDMA:
8075 return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
8076
8077 case RAZWI_NIC:
8078 return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
8079
8080 case RAZWI_DEC:
8081 if (module_idx == 8)
8082 return GAUDI2_PCIE_ENGINE_ID_DEC_0;
8083
8084 if (module_idx == 9)
8085 return GAUDI2_PCIE_ENGINE_ID_DEC_1;
8086 ;
8087 return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
8088 (module_idx % NUM_OF_DEC_PER_DCORE) +
8089 (GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
8090
8091 case RAZWI_ROT:
8092 return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
8093
8094 case RAZWI_ARC_FARM:
8095 return GAUDI2_ENGINE_ID_ARC_FARM;
8096
8097 default:
8098 return GAUDI2_ENGINE_ID_SIZE;
8099 }
8100 }
8101
8102 /*
8103 * This function handles RR(Range register) hit events.
8104 * raised be initiators not PSOC RAZWI.
8105 */
gaudi2_ack_module_razwi_event_handler(struct hl_device * hdev,enum razwi_event_sources module,u8 module_idx,u8 module_sub_idx,u64 * event_mask)8106 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
8107 enum razwi_event_sources module, u8 module_idx,
8108 u8 module_sub_idx, u64 *event_mask)
8109 {
8110 bool via_sft = false;
8111 u32 hbw_rtr_id, lbw_rtr_id, dcore_id, dcore_rtr_id, eng_id, binned_idx;
8112 u64 hbw_rtr_mstr_if_base_addr, lbw_rtr_mstr_if_base_addr;
8113 u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
8114 u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
8115 char initiator_name[64];
8116
8117 switch (module) {
8118 case RAZWI_TPC:
8119 sprintf(initiator_name, "TPC_%u", module_idx);
8120 if (hdev->tpc_binning) {
8121 binned_idx = __ffs(hdev->tpc_binning);
8122 if (binned_idx == module_idx)
8123 module_idx = TPC_ID_DCORE0_TPC6;
8124 }
8125
8126 hbw_rtr_id = gaudi2_tpc_initiator_hbw_rtr_id[module_idx];
8127 lbw_rtr_id = gaudi2_tpc_initiator_lbw_rtr_id[module_idx];
8128 break;
8129 case RAZWI_MME:
8130 sprintf(initiator_name, "MME_%u", module_idx);
8131 switch (module_sub_idx) {
8132 case MME_WAP0:
8133 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap0;
8134 break;
8135 case MME_WAP1:
8136 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].wap1;
8137 break;
8138 case MME_WRITE:
8139 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].write;
8140 break;
8141 case MME_READ:
8142 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].read;
8143 break;
8144 case MME_SBTE0:
8145 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte0;
8146 break;
8147 case MME_SBTE1:
8148 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte1;
8149 break;
8150 case MME_SBTE2:
8151 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte2;
8152 break;
8153 case MME_SBTE3:
8154 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte3;
8155 break;
8156 case MME_SBTE4:
8157 hbw_rtr_id = gaudi2_mme_initiator_rtr_id[module_idx].sbte4;
8158 break;
8159 default:
8160 return;
8161 }
8162 lbw_rtr_id = hbw_rtr_id;
8163 break;
8164 case RAZWI_EDMA:
8165 hbw_rtr_mstr_if_base_addr = gaudi2_edma_initiator_hbw_sft[module_idx];
8166 dcore_id = module_idx / NUM_OF_EDMA_PER_DCORE;
8167 /* SFT has separate MSTR_IF for LBW, only there we can
8168 * read the LBW razwi related registers
8169 */
8170 lbw_rtr_mstr_if_base_addr = mmSFT0_LBW_RTR_IF_MSTR_IF_RR_SHRD_HBW_BASE +
8171 dcore_id * SFT_DCORE_OFFSET;
8172 via_sft = true;
8173 sprintf(initiator_name, "EDMA_%u", module_idx);
8174 break;
8175 case RAZWI_PDMA:
8176 hbw_rtr_id = gaudi2_pdma_initiator_hbw_rtr_id[module_idx];
8177 lbw_rtr_id = gaudi2_pdma_initiator_lbw_rtr_id[module_idx];
8178 sprintf(initiator_name, "PDMA_%u", module_idx);
8179 break;
8180 case RAZWI_NIC:
8181 hbw_rtr_id = gaudi2_nic_initiator_hbw_rtr_id[module_idx];
8182 lbw_rtr_id = gaudi2_nic_initiator_lbw_rtr_id[module_idx];
8183 sprintf(initiator_name, "NIC_%u", module_idx);
8184 break;
8185 case RAZWI_DEC:
8186 sprintf(initiator_name, "DEC_%u", module_idx);
8187 if (hdev->decoder_binning) {
8188 binned_idx = __ffs(hdev->decoder_binning);
8189 if (binned_idx == module_idx)
8190 module_idx = DEC_ID_PCIE_VDEC1;
8191 }
8192 hbw_rtr_id = gaudi2_dec_initiator_hbw_rtr_id[module_idx];
8193 lbw_rtr_id = gaudi2_dec_initiator_lbw_rtr_id[module_idx];
8194 break;
8195 case RAZWI_ROT:
8196 hbw_rtr_id = gaudi2_rot_initiator_hbw_rtr_id[module_idx];
8197 lbw_rtr_id = gaudi2_rot_initiator_lbw_rtr_id[module_idx];
8198 sprintf(initiator_name, "ROT_%u", module_idx);
8199 break;
8200 case RAZWI_ARC_FARM:
8201 lbw_rtr_id = DCORE1_RTR5;
8202 hbw_rtr_id = DCORE1_RTR7;
8203 sprintf(initiator_name, "ARC_FARM_%u", module_idx);
8204 break;
8205 default:
8206 return;
8207 }
8208
8209 /* Find router mstr_if register base */
8210 if (!via_sft) {
8211 dcore_id = hbw_rtr_id / NUM_OF_RTR_PER_DCORE;
8212 dcore_rtr_id = hbw_rtr_id % NUM_OF_RTR_PER_DCORE;
8213 hbw_rtr_mstr_if_base_addr = mmDCORE0_RTR0_CTRL_BASE +
8214 dcore_id * DCORE_OFFSET +
8215 dcore_rtr_id * DCORE_RTR_OFFSET +
8216 RTR_MSTR_IF_OFFSET;
8217 lbw_rtr_mstr_if_base_addr = hbw_rtr_mstr_if_base_addr +
8218 (((s32)lbw_rtr_id - hbw_rtr_id) * DCORE_RTR_OFFSET);
8219 }
8220
8221 /* Find out event cause by reading "RAZWI_HAPPENED" registers */
8222 hbw_shrd_aw = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED);
8223 hbw_shrd_ar = RREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED);
8224 lbw_shrd_aw = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED);
8225 lbw_shrd_ar = RREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED);
8226
8227 eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
8228 if (hbw_shrd_aw) {
8229 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, true,
8230 initiator_name, eng_id, event_mask);
8231
8232 /* Clear event indication */
8233 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED, hbw_shrd_aw);
8234 }
8235
8236 if (hbw_shrd_ar) {
8237 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, hbw_rtr_mstr_if_base_addr, false,
8238 initiator_name, eng_id, event_mask);
8239
8240 /* Clear event indication */
8241 WREG32(hbw_rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED, hbw_shrd_ar);
8242 }
8243
8244 if (lbw_shrd_aw) {
8245 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, true,
8246 initiator_name, eng_id, event_mask);
8247
8248 /* Clear event indication */
8249 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED, lbw_shrd_aw);
8250 }
8251
8252 if (lbw_shrd_ar) {
8253 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, lbw_rtr_mstr_if_base_addr, false,
8254 initiator_name, eng_id, event_mask);
8255
8256 /* Clear event indication */
8257 WREG32(lbw_rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED, lbw_shrd_ar);
8258 }
8259 }
8260
gaudi2_check_if_razwi_happened(struct hl_device * hdev)8261 static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
8262 {
8263 struct asic_fixed_properties *prop = &hdev->asic_prop;
8264 u8 mod_idx, sub_mod;
8265
8266 /* check all TPCs */
8267 for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
8268 if (prop->tpc_enabled_mask & BIT(mod_idx))
8269 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
8270 }
8271
8272 /* check all MMEs */
8273 for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8274 for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
8275 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
8276 sub_mod, NULL);
8277
8278 /* check all EDMAs */
8279 for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
8280 if (prop->edma_enabled_mask & BIT(mod_idx))
8281 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
8282
8283 /* check all PDMAs */
8284 for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
8285 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
8286
8287 /* check all NICs */
8288 for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
8289 if (hdev->nic_ports_mask & BIT(mod_idx))
8290 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
8291 NULL);
8292
8293 /* check all DECs */
8294 for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
8295 if (prop->decoder_enabled_mask & BIT(mod_idx))
8296 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
8297
8298 /* check all ROTs */
8299 for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
8300 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
8301 }
8302
gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info * razwi_info,u32 array_size,u32 axuser_xy,u32 * base,u16 * eng_id,char * eng_name)8303 static int gaudi2_psoc_razwi_get_engines(struct gaudi2_razwi_info *razwi_info, u32 array_size,
8304 u32 axuser_xy, u32 *base, u16 *eng_id,
8305 char *eng_name)
8306 {
8307
8308 int i, num_of_eng = 0;
8309 u16 str_size = 0;
8310
8311 for (i = 0 ; i < array_size ; i++) {
8312 if (axuser_xy != razwi_info[i].axuser_xy)
8313 continue;
8314
8315 eng_id[num_of_eng] = razwi_info[i].eng_id;
8316 base[num_of_eng] = razwi_info[i].rtr_ctrl;
8317 if (!num_of_eng)
8318 str_size += scnprintf(eng_name + str_size,
8319 PSOC_RAZWI_ENG_STR_SIZE - str_size, "%s",
8320 razwi_info[i].eng_name);
8321 else
8322 str_size += scnprintf(eng_name + str_size,
8323 PSOC_RAZWI_ENG_STR_SIZE - str_size, " or %s",
8324 razwi_info[i].eng_name);
8325 num_of_eng++;
8326 }
8327
8328 return num_of_eng;
8329 }
8330
gaudi2_handle_psoc_razwi_happened(struct hl_device * hdev,u32 razwi_reg,u64 * event_mask)8331 static bool gaudi2_handle_psoc_razwi_happened(struct hl_device *hdev, u32 razwi_reg,
8332 u64 *event_mask)
8333 {
8334 u32 axuser_xy = RAZWI_GET_AXUSER_XY(razwi_reg), addr_hi = 0, addr_lo = 0;
8335 u32 base[PSOC_RAZWI_MAX_ENG_PER_RTR];
8336 u16 num_of_eng, eng_id[PSOC_RAZWI_MAX_ENG_PER_RTR];
8337 char eng_name_str[PSOC_RAZWI_ENG_STR_SIZE];
8338 bool razwi_happened = false;
8339 u64 addr;
8340 int i;
8341
8342 num_of_eng = gaudi2_psoc_razwi_get_engines(common_razwi_info, ARRAY_SIZE(common_razwi_info),
8343 axuser_xy, base, eng_id, eng_name_str);
8344
8345 /* If no match for XY coordinates, try to find it in MME razwi table */
8346 if (!num_of_eng) {
8347 axuser_xy = RAZWI_GET_AXUSER_LOW_XY(razwi_reg);
8348 num_of_eng = gaudi2_psoc_razwi_get_engines(mme_razwi_info,
8349 ARRAY_SIZE(mme_razwi_info),
8350 axuser_xy, base, eng_id,
8351 eng_name_str);
8352 }
8353
8354 for (i = 0 ; i < num_of_eng ; i++) {
8355 if (RREG32(base[i] + DEC_RAZWI_HBW_AW_SET)) {
8356 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_HI);
8357 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AW_ADDR_LO);
8358 addr = ((u64)addr_hi << 32) + addr_lo;
8359 if (addr) {
8360 dev_err(hdev->dev,
8361 "PSOC HBW AW RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8362 eng_name_str, addr);
8363 hl_handle_razwi(hdev, addr, &eng_id[0],
8364 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_WRITE, event_mask);
8365 razwi_happened = true;
8366 }
8367 }
8368
8369 if (RREG32(base[i] + DEC_RAZWI_HBW_AR_SET)) {
8370 addr_hi = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_HI);
8371 addr_lo = RREG32(base[i] + DEC_RAZWI_HBW_AR_ADDR_LO);
8372 addr = ((u64)addr_hi << 32) + addr_lo;
8373 if (addr) {
8374 dev_err(hdev->dev,
8375 "PSOC HBW AR RAZWI: %s, address (aligned to 128 byte): 0x%llX\n",
8376 eng_name_str, addr);
8377 hl_handle_razwi(hdev, addr, &eng_id[0],
8378 num_of_eng, HL_RAZWI_HBW | HL_RAZWI_READ, event_mask);
8379 razwi_happened = true;
8380 }
8381 }
8382
8383 if (RREG32(base[i] + DEC_RAZWI_LBW_AW_SET)) {
8384 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AW_ADDR);
8385 if (addr_lo) {
8386 dev_err(hdev->dev,
8387 "PSOC LBW AW RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8388 eng_name_str, addr_lo);
8389 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8390 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_WRITE, event_mask);
8391 razwi_happened = true;
8392 }
8393 }
8394
8395 if (RREG32(base[i] + DEC_RAZWI_LBW_AR_SET)) {
8396 addr_lo = RREG32(base[i] + DEC_RAZWI_LBW_AR_ADDR);
8397 if (addr_lo) {
8398 dev_err(hdev->dev,
8399 "PSOC LBW AR RAZWI: %s, address (aligned to 128 byte): 0x%X\n",
8400 eng_name_str, addr_lo);
8401 hl_handle_razwi(hdev, addr_lo, &eng_id[0],
8402 num_of_eng, HL_RAZWI_LBW | HL_RAZWI_READ, event_mask);
8403 razwi_happened = true;
8404 }
8405 }
8406 /* In common case the loop will break, when there is only one engine id, or
8407 * several engines with the same router. The exceptional case is with psoc razwi
8408 * from EDMA, where it's possible to get axuser id which fits 2 routers (2
8409 * interfaces of sft router). In this case, maybe the first router won't hold info
8410 * and we will need to iterate on the other router.
8411 */
8412 if (razwi_happened)
8413 break;
8414 }
8415
8416 return razwi_happened;
8417 }
8418
8419 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
gaudi2_ack_psoc_razwi_event_handler(struct hl_device * hdev,u64 * event_mask)8420 static int gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
8421 {
8422 u32 razwi_mask_info, razwi_intr = 0, error_count = 0;
8423
8424 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX)) {
8425 razwi_intr = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT);
8426 if (!razwi_intr)
8427 return 0;
8428 }
8429
8430 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO);
8431
8432 dev_err_ratelimited(hdev->dev,
8433 "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n",
8434 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info),
8435 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info),
8436 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info),
8437 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info),
8438 FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info));
8439
8440 if (gaudi2_handle_psoc_razwi_happened(hdev, razwi_mask_info, event_mask))
8441 error_count++;
8442 else
8443 dev_err_ratelimited(hdev->dev,
8444 "PSOC RAZWI interrupt: invalid razwi info (0x%x)\n",
8445 razwi_mask_info);
8446
8447 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
8448 if (hdev->pldm || !(hdev->fw_components & FW_TYPE_LINUX))
8449 WREG32(mmPSOC_GLOBAL_CONF_RAZWI_INTERRUPT, razwi_intr);
8450
8451 return error_count;
8452 }
8453
_gaudi2_handle_qm_sei_err(struct hl_device * hdev,u64 qman_base,u16 event_type)8454 static int _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base, u16 event_type)
8455 {
8456 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8457
8458 sts_val = RREG32(qman_base + QM_SEI_STATUS_OFFSET);
8459
8460 for (i = 0 ; i < GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE ; i++) {
8461 if (sts_val & BIT(i)) {
8462 gaudi2_print_event(hdev, event_type, true,
8463 "err cause: %s", gaudi2_qm_sei_error_cause[i]);
8464 sts_clr_val |= BIT(i);
8465 error_count++;
8466 }
8467 }
8468
8469 WREG32(qman_base + QM_SEI_STATUS_OFFSET, sts_clr_val);
8470
8471 return error_count;
8472 }
8473
gaudi2_handle_qm_sei_err(struct hl_device * hdev,u16 event_type,bool extended_err_check,u64 * event_mask)8474 static int gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
8475 bool extended_err_check, u64 *event_mask)
8476 {
8477 enum razwi_event_sources module;
8478 u32 error_count = 0;
8479 u64 qman_base;
8480 u8 index;
8481
8482 switch (event_type) {
8483 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC23_AXI_ERR_RSP:
8484 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
8485 qman_base = mmDCORE0_TPC0_QM_BASE +
8486 (index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
8487 (index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
8488 module = RAZWI_TPC;
8489 break;
8490 case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
8491 qman_base = mmDCORE0_TPC6_QM_BASE;
8492 module = RAZWI_TPC;
8493 break;
8494 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
8495 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
8496 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
8497 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
8498 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
8499 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
8500 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
8501 qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
8502 module = RAZWI_MME;
8503 break;
8504 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
8505 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
8506 index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
8507 qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
8508 module = RAZWI_PDMA;
8509 break;
8510 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
8511 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
8512 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
8513 qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
8514 module = RAZWI_ROT;
8515 break;
8516 default:
8517 return 0;
8518 }
8519
8520 error_count = _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8521
8522 /* There is a single event per NIC macro, so should check its both QMAN blocks */
8523 if (event_type >= GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE &&
8524 event_type <= GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE)
8525 error_count += _gaudi2_handle_qm_sei_err(hdev,
8526 qman_base + NIC_QM_OFFSET, event_type);
8527
8528 if (extended_err_check) {
8529 /* check if RAZWI happened */
8530 gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, event_mask);
8531 hl_check_for_glbl_errors(hdev);
8532 }
8533
8534 return error_count;
8535 }
8536
gaudi2_handle_qman_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8537 static int gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8538 {
8539 u32 qid_base, error_count = 0;
8540 u64 qman_base;
8541 u8 index = 0;
8542
8543 switch (event_type) {
8544 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC5_QM:
8545 index = event_type - GAUDI2_EVENT_TPC0_QM;
8546 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 + index * QMAN_STREAMS;
8547 qman_base = mmDCORE0_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8548 break;
8549 case GAUDI2_EVENT_TPC6_QM ... GAUDI2_EVENT_TPC11_QM:
8550 index = event_type - GAUDI2_EVENT_TPC6_QM;
8551 qid_base = GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 + index * QMAN_STREAMS;
8552 qman_base = mmDCORE1_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8553 break;
8554 case GAUDI2_EVENT_TPC12_QM ... GAUDI2_EVENT_TPC17_QM:
8555 index = event_type - GAUDI2_EVENT_TPC12_QM;
8556 qid_base = GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 + index * QMAN_STREAMS;
8557 qman_base = mmDCORE2_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8558 break;
8559 case GAUDI2_EVENT_TPC18_QM ... GAUDI2_EVENT_TPC23_QM:
8560 index = event_type - GAUDI2_EVENT_TPC18_QM;
8561 qid_base = GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 + index * QMAN_STREAMS;
8562 qman_base = mmDCORE3_TPC0_QM_BASE + index * DCORE_TPC_OFFSET;
8563 break;
8564 case GAUDI2_EVENT_TPC24_QM:
8565 qid_base = GAUDI2_QUEUE_ID_DCORE0_TPC_6_0;
8566 qman_base = mmDCORE0_TPC6_QM_BASE;
8567 break;
8568 case GAUDI2_EVENT_MME0_QM:
8569 qid_base = GAUDI2_QUEUE_ID_DCORE0_MME_0_0;
8570 qman_base = mmDCORE0_MME_QM_BASE;
8571 break;
8572 case GAUDI2_EVENT_MME1_QM:
8573 qid_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
8574 qman_base = mmDCORE1_MME_QM_BASE;
8575 break;
8576 case GAUDI2_EVENT_MME2_QM:
8577 qid_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
8578 qman_base = mmDCORE2_MME_QM_BASE;
8579 break;
8580 case GAUDI2_EVENT_MME3_QM:
8581 qid_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
8582 qman_base = mmDCORE3_MME_QM_BASE;
8583 break;
8584 case GAUDI2_EVENT_HDMA0_QM:
8585 index = 0;
8586 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0;
8587 qman_base = mmDCORE0_EDMA0_QM_BASE;
8588 break;
8589 case GAUDI2_EVENT_HDMA1_QM:
8590 index = 1;
8591 qid_base = GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0;
8592 qman_base = mmDCORE0_EDMA1_QM_BASE;
8593 break;
8594 case GAUDI2_EVENT_HDMA2_QM:
8595 index = 2;
8596 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0;
8597 qman_base = mmDCORE1_EDMA0_QM_BASE;
8598 break;
8599 case GAUDI2_EVENT_HDMA3_QM:
8600 index = 3;
8601 qid_base = GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0;
8602 qman_base = mmDCORE1_EDMA1_QM_BASE;
8603 break;
8604 case GAUDI2_EVENT_HDMA4_QM:
8605 index = 4;
8606 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0;
8607 qman_base = mmDCORE2_EDMA0_QM_BASE;
8608 break;
8609 case GAUDI2_EVENT_HDMA5_QM:
8610 index = 5;
8611 qid_base = GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0;
8612 qman_base = mmDCORE2_EDMA1_QM_BASE;
8613 break;
8614 case GAUDI2_EVENT_HDMA6_QM:
8615 index = 6;
8616 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0;
8617 qman_base = mmDCORE3_EDMA0_QM_BASE;
8618 break;
8619 case GAUDI2_EVENT_HDMA7_QM:
8620 index = 7;
8621 qid_base = GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0;
8622 qman_base = mmDCORE3_EDMA1_QM_BASE;
8623 break;
8624 case GAUDI2_EVENT_PDMA0_QM:
8625 qid_base = GAUDI2_QUEUE_ID_PDMA_0_0;
8626 qman_base = mmPDMA0_QM_BASE;
8627 break;
8628 case GAUDI2_EVENT_PDMA1_QM:
8629 qid_base = GAUDI2_QUEUE_ID_PDMA_1_0;
8630 qman_base = mmPDMA1_QM_BASE;
8631 break;
8632 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
8633 qid_base = GAUDI2_QUEUE_ID_ROT_0_0;
8634 qman_base = mmROT0_QM_BASE;
8635 break;
8636 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
8637 qid_base = GAUDI2_QUEUE_ID_ROT_1_0;
8638 qman_base = mmROT1_QM_BASE;
8639 break;
8640 default:
8641 return 0;
8642 }
8643
8644 error_count = gaudi2_handle_qman_err_generic(hdev, event_type, qman_base,
8645 qid_base, event_mask);
8646
8647 /* Handle EDMA QM SEI here because there is no AXI error response event for EDMA */
8648 if (event_type >= GAUDI2_EVENT_HDMA2_QM && event_type <= GAUDI2_EVENT_HDMA5_QM) {
8649 error_count += _gaudi2_handle_qm_sei_err(hdev, qman_base, event_type);
8650 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, index, 0, event_mask);
8651 }
8652
8653 hl_check_for_glbl_errors(hdev);
8654
8655 return error_count;
8656 }
8657
gaudi2_handle_arc_farm_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)8658 static int gaudi2_handle_arc_farm_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
8659 {
8660 u32 i, sts_val, sts_clr_val, error_count = 0, arc_farm;
8661
8662 for (arc_farm = 0 ; arc_farm < NUM_OF_ARC_FARMS_ARC ; arc_farm++) {
8663 sts_clr_val = 0;
8664 sts_val = RREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_STS +
8665 (arc_farm * ARC_FARM_OFFSET));
8666
8667 for (i = 0 ; i < GAUDI2_NUM_OF_ARC_SEI_ERR_CAUSE ; i++) {
8668 if (sts_val & BIT(i)) {
8669 gaudi2_print_event(hdev, event_type, true,
8670 "ARC FARM ARC %u err cause: %s",
8671 arc_farm, gaudi2_arc_sei_error_cause[i]);
8672 sts_clr_val |= BIT(i);
8673 error_count++;
8674 }
8675 }
8676 WREG32(mmARC_FARM_ARC0_AUX_ARC_SEI_INTR_CLR + (arc_farm * ARC_FARM_OFFSET),
8677 sts_clr_val);
8678 }
8679
8680 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ARC_FARM, 0, 0, event_mask);
8681 hl_check_for_glbl_errors(hdev);
8682
8683 return error_count;
8684 }
8685
gaudi2_handle_cpu_sei_err(struct hl_device * hdev,u16 event_type)8686 static int gaudi2_handle_cpu_sei_err(struct hl_device *hdev, u16 event_type)
8687 {
8688 u32 i, sts_val, sts_clr_val = 0, error_count = 0;
8689
8690 sts_val = RREG32(mmCPU_IF_CPU_SEI_INTR_STS);
8691
8692 for (i = 0 ; i < GAUDI2_NUM_OF_CPU_SEI_ERR_CAUSE ; i++) {
8693 if (sts_val & BIT(i)) {
8694 gaudi2_print_event(hdev, event_type, true,
8695 "err cause: %s", gaudi2_cpu_sei_error_cause[i]);
8696 sts_clr_val |= BIT(i);
8697 error_count++;
8698 }
8699 }
8700
8701 hl_check_for_glbl_errors(hdev);
8702
8703 WREG32(mmCPU_IF_CPU_SEI_INTR_CLR, sts_clr_val);
8704
8705 return error_count;
8706 }
8707
gaudi2_handle_rot_err(struct hl_device * hdev,u8 rot_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8708 static int gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index, u16 event_type,
8709 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8710 u64 *event_mask)
8711 {
8712 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8713 u32 error_count = 0;
8714 int i;
8715
8716 for (i = 0 ; i < GAUDI2_NUM_OF_ROT_ERR_CAUSE ; i++)
8717 if (intr_cause_data & BIT(i)) {
8718 gaudi2_print_event(hdev, event_type, true,
8719 "err cause: %s", guadi2_rot_error_cause[i]);
8720 error_count++;
8721 }
8722
8723 /* check if RAZWI happened */
8724 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0, event_mask);
8725 hl_check_for_glbl_errors(hdev);
8726
8727 return error_count;
8728 }
8729
gaudi2_tpc_ack_interrupts(struct hl_device * hdev,u8 tpc_index,u16 event_type,struct hl_eq_razwi_with_intr_cause * razwi_with_intr_cause,u64 * event_mask)8730 static int gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, u16 event_type,
8731 struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
8732 u64 *event_mask)
8733 {
8734 u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
8735 u32 error_count = 0;
8736 int i;
8737
8738 for (i = 0 ; i < GAUDI2_NUM_OF_TPC_INTR_CAUSE ; i++)
8739 if (intr_cause_data & BIT(i)) {
8740 gaudi2_print_event(hdev, event_type, true,
8741 "interrupt cause: %s", gaudi2_tpc_interrupts_cause[i]);
8742 error_count++;
8743 }
8744
8745 /* check if RAZWI happened */
8746 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0, event_mask);
8747 hl_check_for_glbl_errors(hdev);
8748
8749 return error_count;
8750 }
8751
gaudi2_handle_dec_err(struct hl_device * hdev,u8 dec_index,u16 event_type,u64 * event_mask)8752 static int gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, u16 event_type,
8753 u64 *event_mask)
8754 {
8755 u32 sts_addr, sts_val, sts_clr_val = 0, error_count = 0;
8756 int i;
8757
8758 if (dec_index < NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES)
8759 /* DCORE DEC */
8760 sts_addr = mmDCORE0_VDEC0_BRDG_CTRL_CAUSE_INTR +
8761 DCORE_OFFSET * (dec_index / NUM_OF_DEC_PER_DCORE) +
8762 DCORE_VDEC_OFFSET * (dec_index % NUM_OF_DEC_PER_DCORE);
8763 else
8764 /* PCIE DEC */
8765 sts_addr = mmPCIE_VDEC0_BRDG_CTRL_CAUSE_INTR + PCIE_VDEC_OFFSET *
8766 (dec_index - NUM_OF_VDEC_PER_DCORE * NUM_OF_DCORES);
8767
8768 sts_val = RREG32(sts_addr);
8769
8770 for (i = 0 ; i < GAUDI2_NUM_OF_DEC_ERR_CAUSE ; i++) {
8771 if (sts_val & BIT(i)) {
8772 gaudi2_print_event(hdev, event_type, true,
8773 "err cause: %s", gaudi2_dec_error_cause[i]);
8774 sts_clr_val |= BIT(i);
8775 error_count++;
8776 }
8777 }
8778
8779 /* check if RAZWI happened */
8780 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, event_mask);
8781 hl_check_for_glbl_errors(hdev);
8782
8783 /* Write 1 clear errors */
8784 WREG32(sts_addr, sts_clr_val);
8785
8786 return error_count;
8787 }
8788
gaudi2_handle_mme_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8789 static int gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8790 u64 *event_mask)
8791 {
8792 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8793 int i;
8794
8795 sts_addr = mmDCORE0_MME_CTRL_LO_INTR_CAUSE + DCORE_OFFSET * mme_index;
8796 sts_clr_addr = mmDCORE0_MME_CTRL_LO_INTR_CLEAR + DCORE_OFFSET * mme_index;
8797
8798 sts_val = RREG32(sts_addr);
8799
8800 for (i = 0 ; i < GAUDI2_NUM_OF_MME_ERR_CAUSE ; i++) {
8801 if (sts_val & BIT(i)) {
8802 gaudi2_print_event(hdev, event_type, true,
8803 "err cause: %s", guadi2_mme_error_cause[i]);
8804 sts_clr_val |= BIT(i);
8805 error_count++;
8806 }
8807 }
8808
8809 /* check if RAZWI happened */
8810 for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
8811 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, event_mask);
8812
8813 hl_check_for_glbl_errors(hdev);
8814
8815 WREG32(sts_clr_addr, sts_clr_val);
8816
8817 return error_count;
8818 }
8819
gaudi2_handle_mme_sbte_err(struct hl_device * hdev,u16 event_type)8820 static int gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u16 event_type)
8821 {
8822 /*
8823 * We have a single error cause here but the report mechanism is
8824 * buggy. Hence there is no good reason to fetch the cause so we
8825 * just check for glbl_errors and exit.
8826 */
8827 hl_check_for_glbl_errors(hdev);
8828
8829 return GAUDI2_NA_EVENT_CAUSE;
8830 }
8831
gaudi2_handle_mme_wap_err(struct hl_device * hdev,u8 mme_index,u16 event_type,u64 * event_mask)8832 static int gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index, u16 event_type,
8833 u64 *event_mask)
8834 {
8835 u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0, error_count = 0;
8836 int i;
8837
8838 sts_addr = mmDCORE0_MME_ACC_INTR_CAUSE + DCORE_OFFSET * mme_index;
8839 sts_clr_addr = mmDCORE0_MME_ACC_INTR_CLEAR + DCORE_OFFSET * mme_index;
8840
8841 sts_val = RREG32(sts_addr);
8842
8843 for (i = 0 ; i < GAUDI2_NUM_OF_MME_WAP_ERR_CAUSE ; i++) {
8844 if (sts_val & BIT(i)) {
8845 gaudi2_print_event(hdev, event_type, true,
8846 "err cause: %s", guadi2_mme_wap_error_cause[i]);
8847 sts_clr_val |= BIT(i);
8848 error_count++;
8849 }
8850 }
8851
8852 /* check if RAZWI happened on WAP0/1 */
8853 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, event_mask);
8854 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, event_mask);
8855 hl_check_for_glbl_errors(hdev);
8856
8857 WREG32(sts_clr_addr, sts_clr_val);
8858
8859 return error_count;
8860 }
8861
gaudi2_handle_kdma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8862 static int gaudi2_handle_kdma_core_event(struct hl_device *hdev, u16 event_type,
8863 u64 intr_cause_data)
8864 {
8865 u32 error_count = 0;
8866 int i;
8867
8868 /* If an AXI read or write error is received, an error is reported and
8869 * interrupt message is sent. Due to an HW errata, when reading the cause
8870 * register of the KDMA engine, the reported error is always HBW even if
8871 * the actual error caused by a LBW KDMA transaction.
8872 */
8873 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8874 if (intr_cause_data & BIT(i)) {
8875 gaudi2_print_event(hdev, event_type, true,
8876 "err cause: %s", gaudi2_kdma_core_interrupts_cause[i]);
8877 error_count++;
8878 }
8879
8880 hl_check_for_glbl_errors(hdev);
8881
8882 return error_count;
8883 }
8884
gaudi2_handle_dma_core_event(struct hl_device * hdev,u16 event_type,u64 intr_cause)8885 static int gaudi2_handle_dma_core_event(struct hl_device *hdev, u16 event_type, u64 intr_cause)
8886 {
8887 u32 error_count = 0;
8888 int i;
8889
8890 for (i = 0 ; i < GAUDI2_NUM_OF_DMA_CORE_INTR_CAUSE ; i++)
8891 if (intr_cause & BIT(i)) {
8892 gaudi2_print_event(hdev, event_type, true,
8893 "err cause: %s", gaudi2_dma_core_interrupts_cause[i]);
8894 error_count++;
8895 }
8896
8897 hl_check_for_glbl_errors(hdev);
8898
8899 return error_count;
8900 }
8901
gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device * hdev,u64 * event_mask)8902 static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
8903 {
8904 u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
8905
8906 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
8907 if (RREG32(razwi_happened_addr)) {
8908 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8909 GAUDI2_ENGINE_ID_PCIE, event_mask);
8910 WREG32(razwi_happened_addr, 0x1);
8911 }
8912
8913 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
8914 if (RREG32(razwi_happened_addr)) {
8915 gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8916 GAUDI2_ENGINE_ID_PCIE, event_mask);
8917 WREG32(razwi_happened_addr, 0x1);
8918 }
8919
8920 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
8921 if (RREG32(razwi_happened_addr)) {
8922 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE",
8923 GAUDI2_ENGINE_ID_PCIE, event_mask);
8924 WREG32(razwi_happened_addr, 0x1);
8925 }
8926
8927 razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
8928 if (RREG32(razwi_happened_addr)) {
8929 gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE",
8930 GAUDI2_ENGINE_ID_PCIE, event_mask);
8931 WREG32(razwi_happened_addr, 0x1);
8932 }
8933 }
8934
gaudi2_print_pcie_addr_dec_info(struct hl_device * hdev,u16 event_type,u64 intr_cause_data,u64 * event_mask)8935 static int gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u16 event_type,
8936 u64 intr_cause_data, u64 *event_mask)
8937 {
8938 u32 error_count = 0;
8939 int i;
8940
8941 for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) {
8942 if (!(intr_cause_data & BIT_ULL(i)))
8943 continue;
8944
8945 gaudi2_print_event(hdev, event_type, true,
8946 "err cause: %s", gaudi2_pcie_addr_dec_error_cause[i]);
8947 error_count++;
8948
8949 switch (intr_cause_data & BIT_ULL(i)) {
8950 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
8951 hl_check_for_glbl_errors(hdev);
8952 break;
8953 case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
8954 gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
8955 break;
8956 }
8957 }
8958
8959 return error_count;
8960 }
8961
gaudi2_handle_pif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8962 static int gaudi2_handle_pif_fatal(struct hl_device *hdev, u16 event_type,
8963 u64 intr_cause_data)
8964
8965 {
8966 u32 error_count = 0;
8967 int i;
8968
8969 for (i = 0 ; i < GAUDI2_NUM_OF_PMMU_FATAL_ERR_CAUSE ; i++) {
8970 if (intr_cause_data & BIT_ULL(i)) {
8971 gaudi2_print_event(hdev, event_type, true,
8972 "err cause: %s", gaudi2_pmmu_fatal_interrupts_cause[i]);
8973 error_count++;
8974 }
8975 }
8976
8977 return error_count;
8978 }
8979
gaudi2_handle_hif_fatal(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)8980 static int gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64 intr_cause_data)
8981 {
8982 u32 error_count = 0;
8983 int i;
8984
8985 for (i = 0 ; i < GAUDI2_NUM_OF_HIF_FATAL_ERR_CAUSE ; i++) {
8986 if (intr_cause_data & BIT_ULL(i)) {
8987 gaudi2_print_event(hdev, event_type, true,
8988 "err cause: %s", gaudi2_hif_fatal_interrupts_cause[i]);
8989 error_count++;
8990 }
8991 }
8992
8993 return error_count;
8994 }
8995
gaudi2_handle_page_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu,u64 * event_mask)8996 static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
8997 u64 *event_mask)
8998 {
8999 u32 valid, val;
9000 u64 addr;
9001
9002 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9003
9004 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_PAGE_ERR_VALID_ENTRY_MASK))
9005 return;
9006
9007 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE));
9008 addr = val & DCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA_63_32_MASK;
9009 addr <<= 32;
9010 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE_VA));
9011
9012 if (is_pmmu) {
9013 dev_err_ratelimited(hdev->dev, "PMMU page fault on va 0x%llx\n", addr);
9014 } else {
9015 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9016 addr &= HW_UNSCRAMBLED_BITS_MASK;
9017 dev_err_ratelimited(hdev->dev, "HMMU page fault on va range 0x%llx - 0x%llx\n",
9018 addr, addr + ~HW_UNSCRAMBLED_BITS_MASK);
9019 }
9020
9021 hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
9022
9023 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9024 }
9025
gaudi2_handle_access_error(struct hl_device * hdev,u64 mmu_base,bool is_pmmu)9026 static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
9027 {
9028 u32 valid, val;
9029 u64 addr;
9030
9031 valid = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID));
9032
9033 if (!(valid & DCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID_ACCESS_ERR_VALID_ENTRY_MASK))
9034 return;
9035
9036 val = RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE));
9037 addr = val & DCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA_63_32_MASK;
9038 addr <<= 32;
9039 addr |= RREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_ERROR_CAPTURE_VA));
9040
9041 if (!is_pmmu)
9042 addr = gaudi2_mmu_descramble_addr(hdev, addr);
9043
9044 dev_err_ratelimited(hdev->dev, "%s access error on va 0x%llx\n",
9045 is_pmmu ? "PMMU" : "HMMU", addr);
9046 WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_ACCESS_PAGE_ERROR_VALID), 0);
9047 }
9048
gaudi2_handle_mmu_spi_sei_generic(struct hl_device * hdev,u16 event_type,u64 mmu_base,bool is_pmmu,u64 * event_mask)9049 static int gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, u16 event_type,
9050 u64 mmu_base, bool is_pmmu, u64 *event_mask)
9051 {
9052 u32 spi_sei_cause, interrupt_clr = 0x0, error_count = 0;
9053 int i;
9054
9055 spi_sei_cause = RREG32(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET);
9056
9057 for (i = 0 ; i < GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE ; i++) {
9058 if (spi_sei_cause & BIT(i)) {
9059 gaudi2_print_event(hdev, event_type, true,
9060 "err cause: %s", gaudi2_mmu_spi_sei[i].cause);
9061
9062 if (i == 0)
9063 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
9064 else if (i == 1)
9065 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
9066
9067 if (gaudi2_mmu_spi_sei[i].clear_bit >= 0)
9068 interrupt_clr |= BIT(gaudi2_mmu_spi_sei[i].clear_bit);
9069
9070 error_count++;
9071 }
9072 }
9073
9074 /* Clear cause */
9075 WREG32_AND(mmu_base + MMU_SPI_SEI_CAUSE_OFFSET, ~spi_sei_cause);
9076
9077 /* Clear interrupt */
9078 WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
9079
9080 return error_count;
9081 }
9082
gaudi2_handle_sm_err(struct hl_device * hdev,u16 event_type,u8 sm_index)9083 static int gaudi2_handle_sm_err(struct hl_device *hdev, u16 event_type, u8 sm_index)
9084 {
9085 u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log,
9086 cq_intr_addr, cq_intr_val, cq_intr_queue_index, error_count = 0;
9087 int i;
9088
9089 sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
9090 cq_intr_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_INTR + DCORE_OFFSET * sm_index;
9091
9092 sei_cause_val = RREG32(sei_cause_addr);
9093 sei_cause_cause = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_CAUSE_MASK, sei_cause_val);
9094 cq_intr_val = RREG32(cq_intr_addr);
9095
9096 /* SEI interrupt */
9097 if (sei_cause_cause) {
9098 /* There are corresponding SEI_CAUSE_log bits for every SEI_CAUSE_cause bit */
9099 sei_cause_log = FIELD_GET(DCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE_LOG_MASK,
9100 sei_cause_val);
9101
9102 for (i = 0 ; i < GAUDI2_NUM_OF_SM_SEI_ERR_CAUSE ; i++) {
9103 if (!(sei_cause_cause & BIT(i)))
9104 continue;
9105
9106 gaudi2_print_event(hdev, event_type, true,
9107 "err cause: %s. %s: 0x%X",
9108 gaudi2_sm_sei_cause[i].cause_name,
9109 gaudi2_sm_sei_cause[i].log_name,
9110 sei_cause_log);
9111 error_count++;
9112 break;
9113 }
9114
9115 /* Clear SM_SEI_CAUSE */
9116 WREG32(sei_cause_addr, 0);
9117 }
9118
9119 /* CQ interrupt */
9120 if (cq_intr_val & DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_SEC_INTR_MASK) {
9121 cq_intr_queue_index =
9122 FIELD_GET(DCORE0_SYNC_MNGR_GLBL_CQ_INTR_CQ_INTR_QUEUE_INDEX_MASK,
9123 cq_intr_val);
9124
9125 dev_err_ratelimited(hdev->dev, "SM%u err. err cause: CQ_INTR. queue index: %u\n",
9126 sm_index, cq_intr_queue_index);
9127 error_count++;
9128
9129 /* Clear CQ_INTR */
9130 WREG32(cq_intr_addr, 0);
9131 }
9132
9133 hl_check_for_glbl_errors(hdev);
9134
9135 return error_count;
9136 }
9137
get_hmmu_base(u16 event_type)9138 static u64 get_hmmu_base(u16 event_type)
9139 {
9140 u8 dcore, index_in_dcore;
9141
9142 switch (event_type) {
9143 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP:
9144 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU0_SECURITY_ERROR:
9145 dcore = 0;
9146 index_in_dcore = 0;
9147 break;
9148 case GAUDI2_EVENT_HMMU_1_AXI_ERR_RSP:
9149 case GAUDI2_EVENT_HMMU1_SPI_BASE ... GAUDI2_EVENT_HMMU1_SECURITY_ERROR:
9150 dcore = 1;
9151 index_in_dcore = 0;
9152 break;
9153 case GAUDI2_EVENT_HMMU_2_AXI_ERR_RSP:
9154 case GAUDI2_EVENT_HMMU2_SPI_BASE ... GAUDI2_EVENT_HMMU2_SECURITY_ERROR:
9155 dcore = 0;
9156 index_in_dcore = 1;
9157 break;
9158 case GAUDI2_EVENT_HMMU_3_AXI_ERR_RSP:
9159 case GAUDI2_EVENT_HMMU3_SPI_BASE ... GAUDI2_EVENT_HMMU3_SECURITY_ERROR:
9160 dcore = 1;
9161 index_in_dcore = 1;
9162 break;
9163 case GAUDI2_EVENT_HMMU_4_AXI_ERR_RSP:
9164 case GAUDI2_EVENT_HMMU4_SPI_BASE ... GAUDI2_EVENT_HMMU4_SECURITY_ERROR:
9165 dcore = 3;
9166 index_in_dcore = 2;
9167 break;
9168 case GAUDI2_EVENT_HMMU_5_AXI_ERR_RSP:
9169 case GAUDI2_EVENT_HMMU5_SPI_BASE ... GAUDI2_EVENT_HMMU5_SECURITY_ERROR:
9170 dcore = 2;
9171 index_in_dcore = 2;
9172 break;
9173 case GAUDI2_EVENT_HMMU_6_AXI_ERR_RSP:
9174 case GAUDI2_EVENT_HMMU6_SPI_BASE ... GAUDI2_EVENT_HMMU6_SECURITY_ERROR:
9175 dcore = 3;
9176 index_in_dcore = 3;
9177 break;
9178 case GAUDI2_EVENT_HMMU_7_AXI_ERR_RSP:
9179 case GAUDI2_EVENT_HMMU7_SPI_BASE ... GAUDI2_EVENT_HMMU7_SECURITY_ERROR:
9180 dcore = 2;
9181 index_in_dcore = 3;
9182 break;
9183 case GAUDI2_EVENT_HMMU_8_AXI_ERR_RSP:
9184 case GAUDI2_EVENT_HMMU8_SPI_BASE ... GAUDI2_EVENT_HMMU8_SECURITY_ERROR:
9185 dcore = 0;
9186 index_in_dcore = 2;
9187 break;
9188 case GAUDI2_EVENT_HMMU_9_AXI_ERR_RSP:
9189 case GAUDI2_EVENT_HMMU9_SPI_BASE ... GAUDI2_EVENT_HMMU9_SECURITY_ERROR:
9190 dcore = 1;
9191 index_in_dcore = 2;
9192 break;
9193 case GAUDI2_EVENT_HMMU_10_AXI_ERR_RSP:
9194 case GAUDI2_EVENT_HMMU10_SPI_BASE ... GAUDI2_EVENT_HMMU10_SECURITY_ERROR:
9195 dcore = 0;
9196 index_in_dcore = 3;
9197 break;
9198 case GAUDI2_EVENT_HMMU_11_AXI_ERR_RSP:
9199 case GAUDI2_EVENT_HMMU11_SPI_BASE ... GAUDI2_EVENT_HMMU11_SECURITY_ERROR:
9200 dcore = 1;
9201 index_in_dcore = 3;
9202 break;
9203 case GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9204 case GAUDI2_EVENT_HMMU12_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9205 dcore = 3;
9206 index_in_dcore = 0;
9207 break;
9208 case GAUDI2_EVENT_HMMU_13_AXI_ERR_RSP:
9209 case GAUDI2_EVENT_HMMU13_SPI_BASE ... GAUDI2_EVENT_HMMU13_SECURITY_ERROR:
9210 dcore = 2;
9211 index_in_dcore = 0;
9212 break;
9213 case GAUDI2_EVENT_HMMU_14_AXI_ERR_RSP:
9214 case GAUDI2_EVENT_HMMU14_SPI_BASE ... GAUDI2_EVENT_HMMU14_SECURITY_ERROR:
9215 dcore = 3;
9216 index_in_dcore = 1;
9217 break;
9218 case GAUDI2_EVENT_HMMU_15_AXI_ERR_RSP:
9219 case GAUDI2_EVENT_HMMU15_SPI_BASE ... GAUDI2_EVENT_HMMU15_SECURITY_ERROR:
9220 dcore = 2;
9221 index_in_dcore = 1;
9222 break;
9223 default:
9224 return ULONG_MAX;
9225 }
9226
9227 return mmDCORE0_HMMU0_MMU_BASE + dcore * DCORE_OFFSET + index_in_dcore * DCORE_HMMU_OFFSET;
9228 }
9229
gaudi2_handle_mmu_spi_sei_err(struct hl_device * hdev,u16 event_type,u64 * event_mask)9230 static int gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9231 {
9232 bool is_pmmu = false;
9233 u32 error_count = 0;
9234 u64 mmu_base;
9235
9236 switch (event_type) {
9237 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9238 case GAUDI2_EVENT_HMMU0_SPI_BASE ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9239 mmu_base = get_hmmu_base(event_type);
9240 break;
9241
9242 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9243 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9244 is_pmmu = true;
9245 mmu_base = mmPMMU_HBW_MMU_BASE;
9246 break;
9247 default:
9248 return 0;
9249 }
9250
9251 if (mmu_base == ULONG_MAX)
9252 return 0;
9253
9254 error_count = gaudi2_handle_mmu_spi_sei_generic(hdev, event_type, mmu_base,
9255 is_pmmu, event_mask);
9256 hl_check_for_glbl_errors(hdev);
9257
9258 return error_count;
9259 }
9260
9261
9262 /* returns true if hard reset is required (ECC DERR or Read parity), false otherwise (ECC SERR) */
gaudi2_hbm_sei_handle_read_err(struct hl_device * hdev,struct hl_eq_hbm_sei_read_err_intr_info * rd_err_data,u32 err_cnt)9263 static bool gaudi2_hbm_sei_handle_read_err(struct hl_device *hdev,
9264 struct hl_eq_hbm_sei_read_err_intr_info *rd_err_data, u32 err_cnt)
9265 {
9266 bool require_hard_reset = false;
9267 u32 addr, beat, beat_shift;
9268
9269 dev_err_ratelimited(hdev->dev,
9270 "READ ERROR count: ECC SERR: %d, ECC DERR: %d, RD_PARITY: %d\n",
9271 FIELD_GET(HBM_ECC_SERR_CNTR_MASK, err_cnt),
9272 FIELD_GET(HBM_ECC_DERR_CNTR_MASK, err_cnt),
9273 FIELD_GET(HBM_RD_PARITY_CNTR_MASK, err_cnt));
9274
9275 addr = le32_to_cpu(rd_err_data->dbg_rd_err_addr.rd_addr_val);
9276 dev_err_ratelimited(hdev->dev,
9277 "READ ERROR address: sid(%u), bg(%u), ba(%u), col(%u), row(%u)\n",
9278 FIELD_GET(HBM_RD_ADDR_SID_MASK, addr),
9279 FIELD_GET(HBM_RD_ADDR_BG_MASK, addr),
9280 FIELD_GET(HBM_RD_ADDR_BA_MASK, addr),
9281 FIELD_GET(HBM_RD_ADDR_COL_MASK, addr),
9282 FIELD_GET(HBM_RD_ADDR_ROW_MASK, addr));
9283
9284 /* For each beat (RDQS edge), look for possible errors and print relevant info */
9285 for (beat = 0 ; beat < 4 ; beat++) {
9286 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9287 (HBM_RD_ERR_SERR_BEAT0_MASK << beat))
9288 dev_err_ratelimited(hdev->dev, "Beat%d ECC SERR: DM: %#x, Syndrome: %#x\n",
9289 beat,
9290 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9291 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9292
9293 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9294 (HBM_RD_ERR_DERR_BEAT0_MASK << beat)) {
9295 dev_err_ratelimited(hdev->dev, "Beat%d ECC DERR: DM: %#x, Syndrome: %#x\n",
9296 beat,
9297 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9298 le32_to_cpu(rd_err_data->dbg_rd_err_syndrome));
9299 require_hard_reset = true;
9300 }
9301
9302 beat_shift = beat * HBM_RD_ERR_BEAT_SHIFT;
9303 if (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9304 (HBM_RD_ERR_PAR_ERR_BEAT0_MASK << beat_shift)) {
9305 dev_err_ratelimited(hdev->dev,
9306 "Beat%d read PARITY: DM: %#x, PAR data: %#x\n",
9307 beat,
9308 le32_to_cpu(rd_err_data->dbg_rd_err_dm),
9309 (le32_to_cpu(rd_err_data->dbg_rd_err_misc) &
9310 (HBM_RD_ERR_PAR_DATA_BEAT0_MASK << beat_shift)) >>
9311 (HBM_RD_ERR_PAR_DATA_BEAT0_SHIFT + beat_shift));
9312 require_hard_reset = true;
9313 }
9314
9315 dev_err_ratelimited(hdev->dev, "Beat%d DQ data:\n", beat);
9316 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9317 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2]));
9318 dev_err_ratelimited(hdev->dev, "\t0x%08x\n",
9319 le32_to_cpu(rd_err_data->dbg_rd_err_data[beat * 2 + 1]));
9320 }
9321
9322 return require_hard_reset;
9323 }
9324
gaudi2_hbm_sei_print_wr_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_wr_par_intr_info * wr_par_err_data,u32 err_cnt)9325 static void gaudi2_hbm_sei_print_wr_par_info(struct hl_device *hdev,
9326 struct hl_eq_hbm_sei_wr_par_intr_info *wr_par_err_data, u32 err_cnt)
9327 {
9328 struct hbm_sei_wr_cmd_address *wr_cmd_addr = wr_par_err_data->dbg_last_wr_cmds;
9329 u32 i, curr_addr, derr = wr_par_err_data->dbg_derr;
9330
9331 dev_err_ratelimited(hdev->dev, "WRITE PARITY ERROR count: %d\n", err_cnt);
9332
9333 dev_err_ratelimited(hdev->dev, "CK-0 DERR: 0x%02x, CK-1 DERR: 0x%02x\n",
9334 derr & 0x3, derr & 0xc);
9335
9336 /* JIRA H6-3286 - the following prints may not be valid */
9337 dev_err_ratelimited(hdev->dev, "Last latched write commands addresses:\n");
9338 for (i = 0 ; i < HBM_WR_PAR_CMD_LIFO_LEN ; i++) {
9339 curr_addr = le32_to_cpu(wr_cmd_addr[i].dbg_wr_cmd_addr);
9340 dev_err_ratelimited(hdev->dev,
9341 "\twrite cmd[%u]: Address: SID(%u) BG(%u) BA(%u) COL(%u).\n",
9342 i,
9343 FIELD_GET(WR_PAR_LAST_CMD_SID_MASK, curr_addr),
9344 FIELD_GET(WR_PAR_LAST_CMD_BG_MASK, curr_addr),
9345 FIELD_GET(WR_PAR_LAST_CMD_BA_MASK, curr_addr),
9346 FIELD_GET(WR_PAR_LAST_CMD_COL_MASK, curr_addr));
9347 }
9348 }
9349
gaudi2_hbm_sei_print_ca_par_info(struct hl_device * hdev,struct hl_eq_hbm_sei_ca_par_intr_info * ca_par_err_data,u32 err_cnt)9350 static void gaudi2_hbm_sei_print_ca_par_info(struct hl_device *hdev,
9351 struct hl_eq_hbm_sei_ca_par_intr_info *ca_par_err_data, u32 err_cnt)
9352 {
9353 __le32 *col_cmd = ca_par_err_data->dbg_col;
9354 __le16 *row_cmd = ca_par_err_data->dbg_row;
9355 u32 i;
9356
9357 dev_err_ratelimited(hdev->dev, "CA ERROR count: %d\n", err_cnt);
9358
9359 dev_err_ratelimited(hdev->dev, "Last latched C&R bus commands:\n");
9360 for (i = 0 ; i < HBM_CA_ERR_CMD_LIFO_LEN ; i++)
9361 dev_err_ratelimited(hdev->dev, "cmd%u: ROW(0x%04x) COL(0x%05x)\n", i,
9362 le16_to_cpu(row_cmd[i]) & (u16)GENMASK(13, 0),
9363 le32_to_cpu(col_cmd[i]) & (u32)GENMASK(17, 0));
9364 }
9365
9366 /* Returns true if hard reset is needed or false otherwise */
gaudi2_handle_hbm_mc_sei_err(struct hl_device * hdev,u16 event_type,struct hl_eq_hbm_sei_data * sei_data)9367 static bool gaudi2_handle_hbm_mc_sei_err(struct hl_device *hdev, u16 event_type,
9368 struct hl_eq_hbm_sei_data *sei_data)
9369 {
9370 bool require_hard_reset = false;
9371 u32 hbm_id, mc_id, cause_idx;
9372
9373 hbm_id = (event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 4;
9374 mc_id = ((event_type - GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE) / 2) % 2;
9375
9376 cause_idx = sei_data->hdr.sei_cause;
9377 if (cause_idx > GAUDI2_NUM_OF_HBM_SEI_CAUSE - 1) {
9378 gaudi2_print_event(hdev, event_type, true,
9379 "err cause: %s",
9380 "Invalid HBM SEI event cause (%d) provided by FW", cause_idx);
9381 return true;
9382 }
9383
9384 gaudi2_print_event(hdev, event_type, !sei_data->hdr.is_critical,
9385 "System %s Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s",
9386 sei_data->hdr.is_critical ? "Critical" : "Non-critical",
9387 hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel,
9388 hbm_mc_sei_cause[cause_idx]);
9389
9390 /* Print error-specific info */
9391 switch (cause_idx) {
9392 case HBM_SEI_CATTRIP:
9393 require_hard_reset = true;
9394 break;
9395
9396 case HBM_SEI_CMD_PARITY_EVEN:
9397 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_even_info,
9398 le32_to_cpu(sei_data->hdr.cnt));
9399 require_hard_reset = true;
9400 break;
9401
9402 case HBM_SEI_CMD_PARITY_ODD:
9403 gaudi2_hbm_sei_print_ca_par_info(hdev, &sei_data->ca_parity_odd_info,
9404 le32_to_cpu(sei_data->hdr.cnt));
9405 require_hard_reset = true;
9406 break;
9407
9408 case HBM_SEI_WRITE_DATA_PARITY_ERR:
9409 gaudi2_hbm_sei_print_wr_par_info(hdev, &sei_data->wr_parity_info,
9410 le32_to_cpu(sei_data->hdr.cnt));
9411 require_hard_reset = true;
9412 break;
9413
9414 case HBM_SEI_READ_ERR:
9415 /* Unlike other SEI events, read error requires further processing of the
9416 * raw data in order to determine the root cause.
9417 */
9418 require_hard_reset = gaudi2_hbm_sei_handle_read_err(hdev,
9419 &sei_data->read_err_info,
9420 le32_to_cpu(sei_data->hdr.cnt));
9421 break;
9422
9423 default:
9424 break;
9425 }
9426
9427 require_hard_reset |= !!sei_data->hdr.is_critical;
9428
9429 return require_hard_reset;
9430 }
9431
gaudi2_handle_hbm_cattrip(struct hl_device * hdev,u16 event_type,u64 intr_cause_data)9432 static int gaudi2_handle_hbm_cattrip(struct hl_device *hdev, u16 event_type,
9433 u64 intr_cause_data)
9434 {
9435 if (intr_cause_data) {
9436 gaudi2_print_event(hdev, event_type, true,
9437 "temperature error cause: %#llx", intr_cause_data);
9438 return 1;
9439 }
9440
9441 return 0;
9442 }
9443
gaudi2_handle_hbm_mc_spi(struct hl_device * hdev,u64 intr_cause_data)9444 static int gaudi2_handle_hbm_mc_spi(struct hl_device *hdev, u64 intr_cause_data)
9445 {
9446 u32 i, error_count = 0;
9447
9448 for (i = 0 ; i < GAUDI2_NUM_OF_HBM_MC_SPI_CAUSE ; i++)
9449 if (intr_cause_data & hbm_mc_spi[i].mask) {
9450 dev_dbg(hdev->dev, "HBM spi event: notification cause(%s)\n",
9451 hbm_mc_spi[i].cause);
9452 error_count++;
9453 }
9454
9455 return error_count;
9456 }
9457
gaudi2_print_clk_change_info(struct hl_device * hdev,u16 event_type,u64 * event_mask)9458 static void gaudi2_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask)
9459 {
9460 ktime_t zero_time = ktime_set(0, 0);
9461
9462 mutex_lock(&hdev->clk_throttling.lock);
9463
9464 switch (event_type) {
9465 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
9466 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER;
9467 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER;
9468 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get();
9469 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time;
9470 dev_dbg_ratelimited(hdev->dev, "Clock throttling due to power consumption\n");
9471 break;
9472
9473 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
9474 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER;
9475 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get();
9476 dev_dbg_ratelimited(hdev->dev, "Power envelop is safe, back to optimal clock\n");
9477 break;
9478
9479 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
9480 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL;
9481 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL;
9482 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get();
9483 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time;
9484 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9485 dev_info_ratelimited(hdev->dev, "Clock throttling due to overheating\n");
9486 break;
9487
9488 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
9489 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL;
9490 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get();
9491 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9492 dev_info_ratelimited(hdev->dev, "Thermal envelop is safe, back to optimal clock\n");
9493 break;
9494
9495 default:
9496 dev_err(hdev->dev, "Received invalid clock change event %d\n", event_type);
9497 break;
9498 }
9499
9500 mutex_unlock(&hdev->clk_throttling.lock);
9501 }
9502
gaudi2_print_out_of_sync_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9503 static void gaudi2_print_out_of_sync_info(struct hl_device *hdev, u16 event_type,
9504 struct cpucp_pkt_sync_err *sync_err)
9505 {
9506 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9507
9508 gaudi2_print_event(hdev, event_type, false,
9509 "FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9510 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci),
9511 q->pi, atomic_read(&q->ci));
9512 }
9513
gaudi2_handle_pcie_p2p_msix(struct hl_device * hdev,u16 event_type)9514 static int gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev, u16 event_type)
9515 {
9516 u32 p2p_intr, msix_gw_intr, error_count = 0;
9517
9518 p2p_intr = RREG32(mmPCIE_WRAP_P2P_INTR);
9519 msix_gw_intr = RREG32(mmPCIE_WRAP_MSIX_GW_INTR);
9520
9521 if (p2p_intr) {
9522 gaudi2_print_event(hdev, event_type, true,
9523 "pcie p2p transaction terminated due to security, req_id(0x%x)",
9524 RREG32(mmPCIE_WRAP_P2P_REQ_ID));
9525
9526 WREG32(mmPCIE_WRAP_P2P_INTR, 0x1);
9527 error_count++;
9528 }
9529
9530 if (msix_gw_intr) {
9531 gaudi2_print_event(hdev, event_type, true,
9532 "pcie msi-x gen denied due to vector num check failure, vec(0x%X)",
9533 RREG32(mmPCIE_WRAP_MSIX_GW_VEC));
9534
9535 WREG32(mmPCIE_WRAP_MSIX_GW_INTR, 0x1);
9536 error_count++;
9537 }
9538
9539 return error_count;
9540 }
9541
gaudi2_handle_pcie_drain(struct hl_device * hdev,struct hl_eq_pcie_drain_ind_data * drain_data)9542 static int gaudi2_handle_pcie_drain(struct hl_device *hdev,
9543 struct hl_eq_pcie_drain_ind_data *drain_data)
9544 {
9545 u64 cause, error_count = 0;
9546
9547 cause = le64_to_cpu(drain_data->intr_cause.intr_cause_data);
9548
9549 if (cause & BIT_ULL(0)) {
9550 dev_err_ratelimited(hdev->dev, "PCIE AXI drain LBW completed\n");
9551 error_count++;
9552 }
9553
9554 if (cause & BIT_ULL(1)) {
9555 dev_err_ratelimited(hdev->dev, "PCIE AXI drain HBW completed\n");
9556 error_count++;
9557 }
9558
9559 return error_count;
9560 }
9561
gaudi2_handle_psoc_drain(struct hl_device * hdev,u64 intr_cause_data)9562 static int gaudi2_handle_psoc_drain(struct hl_device *hdev, u64 intr_cause_data)
9563 {
9564 u32 error_count = 0;
9565 int i;
9566
9567 for (i = 0 ; i < GAUDI2_NUM_OF_AXI_DRAIN_ERR_CAUSE ; i++) {
9568 if (intr_cause_data & BIT_ULL(i)) {
9569 dev_err_ratelimited(hdev->dev, "PSOC %s completed\n",
9570 gaudi2_psoc_axi_drain_interrupts_cause[i]);
9571 error_count++;
9572 }
9573 }
9574
9575 hl_check_for_glbl_errors(hdev);
9576
9577 return error_count;
9578 }
9579
gaudi2_print_cpu_pkt_failure_info(struct hl_device * hdev,u16 event_type,struct cpucp_pkt_sync_err * sync_err)9580 static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev, u16 event_type,
9581 struct cpucp_pkt_sync_err *sync_err)
9582 {
9583 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
9584
9585 gaudi2_print_event(hdev, event_type, false,
9586 "FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d",
9587 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
9588 }
9589
hl_arc_event_handle(struct hl_device * hdev,u16 event_type,struct hl_eq_engine_arc_intr_data * data)9590 static int hl_arc_event_handle(struct hl_device *hdev, u16 event_type,
9591 struct hl_eq_engine_arc_intr_data *data)
9592 {
9593 struct hl_engine_arc_dccm_queue_full_irq *q;
9594 u32 intr_type, engine_id;
9595 u64 payload;
9596
9597 intr_type = le32_to_cpu(data->intr_type);
9598 engine_id = le32_to_cpu(data->engine_id);
9599 payload = le64_to_cpu(data->payload);
9600
9601 switch (intr_type) {
9602 case ENGINE_ARC_DCCM_QUEUE_FULL_IRQ:
9603 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload;
9604
9605 gaudi2_print_event(hdev, event_type, true,
9606 "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u",
9607 engine_id, intr_type, q->queue_index);
9608 return 1;
9609 default:
9610 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type");
9611 return 0;
9612 }
9613 }
9614
event_id_to_engine_id(struct hl_device * hdev,u16 event_type)9615 static u16 event_id_to_engine_id(struct hl_device *hdev, u16 event_type)
9616 {
9617 enum gaudi2_block_types type = GAUDI2_BLOCK_TYPE_MAX;
9618 u16 index;
9619
9620 switch (event_type) {
9621 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9622 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9623 type = GAUDI2_BLOCK_TYPE_TPC;
9624 break;
9625 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_TPC24_QM:
9626 index = event_type - GAUDI2_EVENT_TPC0_QM;
9627 type = GAUDI2_BLOCK_TYPE_TPC;
9628 break;
9629 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9630 case GAUDI2_EVENT_MME0_SPI_BASE ... GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9631 case GAUDI2_EVENT_MME0_QM:
9632 index = 0;
9633 type = GAUDI2_BLOCK_TYPE_MME;
9634 break;
9635 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9636 case GAUDI2_EVENT_MME1_SPI_BASE ... GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9637 case GAUDI2_EVENT_MME1_QM:
9638 index = 1;
9639 type = GAUDI2_BLOCK_TYPE_MME;
9640 break;
9641 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9642 case GAUDI2_EVENT_MME2_SPI_BASE ... GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9643 case GAUDI2_EVENT_MME2_QM:
9644 index = 2;
9645 type = GAUDI2_BLOCK_TYPE_MME;
9646 break;
9647 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9648 case GAUDI2_EVENT_MME3_SPI_BASE ... GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9649 case GAUDI2_EVENT_MME3_QM:
9650 index = 3;
9651 type = GAUDI2_BLOCK_TYPE_MME;
9652 break;
9653 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9654 case GAUDI2_EVENT_KDMA_BM_SPMU:
9655 case GAUDI2_EVENT_KDMA0_CORE:
9656 return GAUDI2_ENGINE_ID_KDMA;
9657 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9658 case GAUDI2_EVENT_PDMA0_CORE:
9659 case GAUDI2_EVENT_PDMA0_BM_SPMU:
9660 case GAUDI2_EVENT_PDMA0_QM:
9661 return GAUDI2_ENGINE_ID_PDMA_0;
9662 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9663 case GAUDI2_EVENT_PDMA1_CORE:
9664 case GAUDI2_EVENT_PDMA1_BM_SPMU:
9665 case GAUDI2_EVENT_PDMA1_QM:
9666 return GAUDI2_ENGINE_ID_PDMA_1;
9667 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9668 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9669 type = GAUDI2_BLOCK_TYPE_DEC;
9670 break;
9671 case GAUDI2_EVENT_DEC0_SPI ... GAUDI2_EVENT_DEC9_BMON_SPMU:
9672 index = (event_type - GAUDI2_EVENT_DEC0_SPI) >> 1;
9673 type = GAUDI2_BLOCK_TYPE_DEC;
9674 break;
9675 case GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_NIC11_AXI_ERROR_RESPONSE:
9676 index = event_type - GAUDI2_EVENT_NIC0_AXI_ERROR_RESPONSE;
9677 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9678 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9679 index = event_type - GAUDI2_EVENT_NIC0_QM0;
9680 return GAUDI2_ENGINE_ID_NIC0_0 + index;
9681 case GAUDI2_EVENT_NIC0_BMON_SPMU ... GAUDI2_EVENT_NIC11_SW_ERROR:
9682 index = event_type - GAUDI2_EVENT_NIC0_BMON_SPMU;
9683 return GAUDI2_ENGINE_ID_NIC0_0 + (index * 2);
9684 case GAUDI2_EVENT_TPC0_BMON_SPMU ... GAUDI2_EVENT_TPC24_KERNEL_ERR:
9685 index = (event_type - GAUDI2_EVENT_TPC0_BMON_SPMU) >> 1;
9686 type = GAUDI2_BLOCK_TYPE_TPC;
9687 break;
9688 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9689 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU:
9690 case GAUDI2_EVENT_ROTATOR0_ROT0_QM:
9691 return GAUDI2_ENGINE_ID_ROT_0;
9692 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9693 case GAUDI2_EVENT_ROTATOR1_BMON_SPMU:
9694 case GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9695 return GAUDI2_ENGINE_ID_ROT_1;
9696 case GAUDI2_EVENT_HDMA0_BM_SPMU:
9697 case GAUDI2_EVENT_HDMA0_QM:
9698 case GAUDI2_EVENT_HDMA0_CORE:
9699 return GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
9700 case GAUDI2_EVENT_HDMA1_BM_SPMU:
9701 case GAUDI2_EVENT_HDMA1_QM:
9702 case GAUDI2_EVENT_HDMA1_CORE:
9703 return GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
9704 case GAUDI2_EVENT_HDMA2_BM_SPMU:
9705 case GAUDI2_EVENT_HDMA2_QM:
9706 case GAUDI2_EVENT_HDMA2_CORE:
9707 return GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
9708 case GAUDI2_EVENT_HDMA3_BM_SPMU:
9709 case GAUDI2_EVENT_HDMA3_QM:
9710 case GAUDI2_EVENT_HDMA3_CORE:
9711 return GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
9712 case GAUDI2_EVENT_HDMA4_BM_SPMU:
9713 case GAUDI2_EVENT_HDMA4_QM:
9714 case GAUDI2_EVENT_HDMA4_CORE:
9715 return GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
9716 case GAUDI2_EVENT_HDMA5_BM_SPMU:
9717 case GAUDI2_EVENT_HDMA5_QM:
9718 case GAUDI2_EVENT_HDMA5_CORE:
9719 return GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
9720 case GAUDI2_EVENT_HDMA6_BM_SPMU:
9721 case GAUDI2_EVENT_HDMA6_QM:
9722 case GAUDI2_EVENT_HDMA6_CORE:
9723 return GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
9724 case GAUDI2_EVENT_HDMA7_BM_SPMU:
9725 case GAUDI2_EVENT_HDMA7_QM:
9726 case GAUDI2_EVENT_HDMA7_CORE:
9727 return GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
9728 default:
9729 break;
9730 }
9731
9732 switch (type) {
9733 case GAUDI2_BLOCK_TYPE_TPC:
9734 switch (index) {
9735 case TPC_ID_DCORE0_TPC0 ... TPC_ID_DCORE0_TPC5:
9736 return GAUDI2_DCORE0_ENGINE_ID_TPC_0 + index;
9737 case TPC_ID_DCORE1_TPC0 ... TPC_ID_DCORE1_TPC5:
9738 return GAUDI2_DCORE1_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE1_TPC0;
9739 case TPC_ID_DCORE2_TPC0 ... TPC_ID_DCORE2_TPC5:
9740 return GAUDI2_DCORE2_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE2_TPC0;
9741 case TPC_ID_DCORE3_TPC0 ... TPC_ID_DCORE3_TPC5:
9742 return GAUDI2_DCORE3_ENGINE_ID_TPC_0 + index - TPC_ID_DCORE3_TPC0;
9743 default:
9744 break;
9745 }
9746 break;
9747 case GAUDI2_BLOCK_TYPE_MME:
9748 switch (index) {
9749 case MME_ID_DCORE0: return GAUDI2_DCORE0_ENGINE_ID_MME;
9750 case MME_ID_DCORE1: return GAUDI2_DCORE1_ENGINE_ID_MME;
9751 case MME_ID_DCORE2: return GAUDI2_DCORE2_ENGINE_ID_MME;
9752 case MME_ID_DCORE3: return GAUDI2_DCORE3_ENGINE_ID_MME;
9753 default:
9754 break;
9755 }
9756 break;
9757 case GAUDI2_BLOCK_TYPE_DEC:
9758 switch (index) {
9759 case DEC_ID_DCORE0_DEC0: return GAUDI2_DCORE0_ENGINE_ID_DEC_0;
9760 case DEC_ID_DCORE0_DEC1: return GAUDI2_DCORE0_ENGINE_ID_DEC_1;
9761 case DEC_ID_DCORE1_DEC0: return GAUDI2_DCORE1_ENGINE_ID_DEC_0;
9762 case DEC_ID_DCORE1_DEC1: return GAUDI2_DCORE1_ENGINE_ID_DEC_1;
9763 case DEC_ID_DCORE2_DEC0: return GAUDI2_DCORE2_ENGINE_ID_DEC_0;
9764 case DEC_ID_DCORE2_DEC1: return GAUDI2_DCORE2_ENGINE_ID_DEC_1;
9765 case DEC_ID_DCORE3_DEC0: return GAUDI2_DCORE3_ENGINE_ID_DEC_0;
9766 case DEC_ID_DCORE3_DEC1: return GAUDI2_DCORE3_ENGINE_ID_DEC_1;
9767 case DEC_ID_PCIE_VDEC0: return GAUDI2_PCIE_ENGINE_ID_DEC_0;
9768 case DEC_ID_PCIE_VDEC1: return GAUDI2_PCIE_ENGINE_ID_DEC_1;
9769 default:
9770 break;
9771 }
9772 break;
9773 default:
9774 break;
9775 }
9776
9777 return U16_MAX;
9778 }
9779
gaudi2_handle_eqe(struct hl_device * hdev,struct hl_eq_entry * eq_entry)9780 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
9781 {
9782 struct gaudi2_device *gaudi2 = hdev->asic_specific;
9783 bool reset_required = false, is_critical = false;
9784 u32 index, ctl, reset_flags = 0, error_count = 0;
9785 u64 event_mask = 0;
9786 u16 event_type;
9787
9788 ctl = le32_to_cpu(eq_entry->hdr.ctl);
9789 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) >> EQ_CTL_EVENT_TYPE_SHIFT);
9790
9791 if (event_type >= GAUDI2_EVENT_SIZE) {
9792 dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
9793 event_type, GAUDI2_EVENT_SIZE - 1);
9794 return;
9795 }
9796
9797 gaudi2->events_stat[event_type]++;
9798 gaudi2->events_stat_aggregate[event_type]++;
9799
9800 switch (event_type) {
9801 case GAUDI2_EVENT_PCIE_CORE_SERR ... GAUDI2_EVENT_ARC0_ECC_DERR:
9802 fallthrough;
9803 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR:
9804 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9805 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9806 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
9807 is_critical = eq_entry->ecc_data.is_critical;
9808 error_count++;
9809 break;
9810
9811 case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
9812 fallthrough;
9813 case GAUDI2_EVENT_ROTATOR0_ROT0_QM ... GAUDI2_EVENT_ROTATOR1_ROT1_QM:
9814 fallthrough;
9815 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1:
9816 error_count = gaudi2_handle_qman_err(hdev, event_type, &event_mask);
9817 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9818 break;
9819
9820 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0:
9821 error_count = gaudi2_handle_arc_farm_sei_err(hdev, event_type, &event_mask);
9822 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9823 break;
9824
9825 case GAUDI2_EVENT_CPU_AXI_ERR_RSP:
9826 error_count = gaudi2_handle_cpu_sei_err(hdev, event_type);
9827 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9828 event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR;
9829 break;
9830
9831 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
9832 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
9833 error_count = gaudi2_handle_qm_sei_err(hdev, event_type, true, &event_mask);
9834 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9835 break;
9836
9837 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
9838 case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
9839 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
9840 error_count = gaudi2_handle_rot_err(hdev, index, event_type,
9841 &eq_entry->razwi_with_intr_cause, &event_mask);
9842 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9843 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9844 break;
9845
9846 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
9847 index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
9848 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9849 &eq_entry->razwi_with_intr_cause, &event_mask);
9850 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9851 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9852 break;
9853
9854 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
9855 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
9856 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9857 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9858 break;
9859
9860 case GAUDI2_EVENT_TPC0_KERNEL_ERR:
9861 case GAUDI2_EVENT_TPC1_KERNEL_ERR:
9862 case GAUDI2_EVENT_TPC2_KERNEL_ERR:
9863 case GAUDI2_EVENT_TPC3_KERNEL_ERR:
9864 case GAUDI2_EVENT_TPC4_KERNEL_ERR:
9865 case GAUDI2_EVENT_TPC5_KERNEL_ERR:
9866 case GAUDI2_EVENT_TPC6_KERNEL_ERR:
9867 case GAUDI2_EVENT_TPC7_KERNEL_ERR:
9868 case GAUDI2_EVENT_TPC8_KERNEL_ERR:
9869 case GAUDI2_EVENT_TPC9_KERNEL_ERR:
9870 case GAUDI2_EVENT_TPC10_KERNEL_ERR:
9871 case GAUDI2_EVENT_TPC11_KERNEL_ERR:
9872 case GAUDI2_EVENT_TPC12_KERNEL_ERR:
9873 case GAUDI2_EVENT_TPC13_KERNEL_ERR:
9874 case GAUDI2_EVENT_TPC14_KERNEL_ERR:
9875 case GAUDI2_EVENT_TPC15_KERNEL_ERR:
9876 case GAUDI2_EVENT_TPC16_KERNEL_ERR:
9877 case GAUDI2_EVENT_TPC17_KERNEL_ERR:
9878 case GAUDI2_EVENT_TPC18_KERNEL_ERR:
9879 case GAUDI2_EVENT_TPC19_KERNEL_ERR:
9880 case GAUDI2_EVENT_TPC20_KERNEL_ERR:
9881 case GAUDI2_EVENT_TPC21_KERNEL_ERR:
9882 case GAUDI2_EVENT_TPC22_KERNEL_ERR:
9883 case GAUDI2_EVENT_TPC23_KERNEL_ERR:
9884 case GAUDI2_EVENT_TPC24_KERNEL_ERR:
9885 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
9886 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
9887 error_count = gaudi2_tpc_ack_interrupts(hdev, index, event_type,
9888 &eq_entry->razwi_with_intr_cause, &event_mask);
9889 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9890 break;
9891
9892 case GAUDI2_EVENT_DEC0_SPI:
9893 case GAUDI2_EVENT_DEC1_SPI:
9894 case GAUDI2_EVENT_DEC2_SPI:
9895 case GAUDI2_EVENT_DEC3_SPI:
9896 case GAUDI2_EVENT_DEC4_SPI:
9897 case GAUDI2_EVENT_DEC5_SPI:
9898 case GAUDI2_EVENT_DEC6_SPI:
9899 case GAUDI2_EVENT_DEC7_SPI:
9900 case GAUDI2_EVENT_DEC8_SPI:
9901 case GAUDI2_EVENT_DEC9_SPI:
9902 index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
9903 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
9904 error_count = gaudi2_handle_dec_err(hdev, index, event_type, &event_mask);
9905 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9906 break;
9907
9908 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
9909 case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
9910 case GAUDI2_EVENT_MME2_CTRL_AXI_ERROR_RESPONSE:
9911 case GAUDI2_EVENT_MME3_CTRL_AXI_ERROR_RESPONSE:
9912 index = (event_type - GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE) /
9913 (GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
9914 GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
9915 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9916 error_count += gaudi2_handle_qm_sei_err(hdev, event_type, false, &event_mask);
9917 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9918 break;
9919
9920 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR:
9921 case GAUDI2_EVENT_MME1_QMAN_SW_ERROR:
9922 case GAUDI2_EVENT_MME2_QMAN_SW_ERROR:
9923 case GAUDI2_EVENT_MME3_QMAN_SW_ERROR:
9924 index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
9925 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
9926 GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
9927 error_count = gaudi2_handle_mme_err(hdev, index, event_type, &event_mask);
9928 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9929 break;
9930
9931 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID:
9932 case GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID:
9933 case GAUDI2_EVENT_MME2_WAP_SOURCE_RESULT_INVALID:
9934 case GAUDI2_EVENT_MME3_WAP_SOURCE_RESULT_INVALID:
9935 index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
9936 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
9937 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
9938 error_count = gaudi2_handle_mme_wap_err(hdev, index, event_type, &event_mask);
9939 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9940 break;
9941
9942 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP:
9943 case GAUDI2_EVENT_KDMA0_CORE:
9944 error_count = gaudi2_handle_kdma_core_event(hdev, event_type,
9945 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9946 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9947 break;
9948
9949 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_HDMA5_CORE:
9950 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9951 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9952 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9953 break;
9954
9955 case GAUDI2_EVENT_PDMA0_CORE ... GAUDI2_EVENT_PDMA1_CORE:
9956 error_count = gaudi2_handle_dma_core_event(hdev, event_type,
9957 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9958 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9959 break;
9960
9961 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
9962 error_count = gaudi2_print_pcie_addr_dec_info(hdev, event_type,
9963 le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
9964 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9965 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9966 break;
9967
9968 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR:
9969 case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
9970 case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
9971 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
9972 error_count = gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
9973 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9974 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9975 break;
9976
9977 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL:
9978 error_count = gaudi2_handle_hif_fatal(hdev, event_type,
9979 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9980 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9981 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9982 break;
9983
9984 case GAUDI2_EVENT_PMMU_FATAL_0:
9985 error_count = gaudi2_handle_pif_fatal(hdev, event_type,
9986 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
9987 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
9988 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9989 break;
9990
9991 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
9992 error_count = gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
9993 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
9994 break;
9995
9996 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE:
9997 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
9998 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) {
9999 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10000 reset_required = true;
10001 is_critical = eq_entry->sei_data.hdr.is_critical;
10002 }
10003 error_count++;
10004 break;
10005
10006 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5:
10007 error_count = gaudi2_handle_hbm_cattrip(hdev, event_type,
10008 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10009 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10010 break;
10011
10012 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI:
10013 error_count = gaudi2_handle_hbm_mc_spi(hdev,
10014 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10015 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10016 break;
10017
10018 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE:
10019 error_count = gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data);
10020 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10021 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10022 if (hl_fw_version_cmp(hdev, 1, 13, 0) >= 0)
10023 is_critical = true;
10024 break;
10025
10026 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN:
10027 error_count = gaudi2_handle_psoc_drain(hdev,
10028 le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
10029 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10030 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10031 break;
10032
10033 case GAUDI2_EVENT_CPU_AXI_ECC:
10034 error_count = GAUDI2_NA_EVENT_CAUSE;
10035 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10036 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10037 break;
10038 case GAUDI2_EVENT_CPU_L2_RAM_ECC:
10039 error_count = GAUDI2_NA_EVENT_CAUSE;
10040 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10041 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10042 break;
10043 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP:
10044 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP:
10045 case GAUDI2_EVENT_MME2_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME2_SBTE4_AXI_ERR_RSP:
10046 case GAUDI2_EVENT_MME3_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME3_SBTE4_AXI_ERR_RSP:
10047 error_count = gaudi2_handle_mme_sbte_err(hdev, event_type);
10048 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10049 break;
10050 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B:
10051 error_count = GAUDI2_NA_EVENT_CAUSE;
10052 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10053 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10054 break;
10055 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP:
10056 error_count = GAUDI2_NA_EVENT_CAUSE;
10057 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10058 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10059 break;
10060 case GAUDI2_EVENT_PSOC_PRSTN_FALL:
10061 error_count = GAUDI2_NA_EVENT_CAUSE;
10062 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10063 break;
10064 case GAUDI2_EVENT_PCIE_APB_TIMEOUT:
10065 error_count = GAUDI2_NA_EVENT_CAUSE;
10066 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10067 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10068 break;
10069 case GAUDI2_EVENT_PCIE_FATAL_ERR:
10070 error_count = GAUDI2_NA_EVENT_CAUSE;
10071 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10072 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10073 break;
10074 case GAUDI2_EVENT_TPC0_BMON_SPMU:
10075 case GAUDI2_EVENT_TPC1_BMON_SPMU:
10076 case GAUDI2_EVENT_TPC2_BMON_SPMU:
10077 case GAUDI2_EVENT_TPC3_BMON_SPMU:
10078 case GAUDI2_EVENT_TPC4_BMON_SPMU:
10079 case GAUDI2_EVENT_TPC5_BMON_SPMU:
10080 case GAUDI2_EVENT_TPC6_BMON_SPMU:
10081 case GAUDI2_EVENT_TPC7_BMON_SPMU:
10082 case GAUDI2_EVENT_TPC8_BMON_SPMU:
10083 case GAUDI2_EVENT_TPC9_BMON_SPMU:
10084 case GAUDI2_EVENT_TPC10_BMON_SPMU:
10085 case GAUDI2_EVENT_TPC11_BMON_SPMU:
10086 case GAUDI2_EVENT_TPC12_BMON_SPMU:
10087 case GAUDI2_EVENT_TPC13_BMON_SPMU:
10088 case GAUDI2_EVENT_TPC14_BMON_SPMU:
10089 case GAUDI2_EVENT_TPC15_BMON_SPMU:
10090 case GAUDI2_EVENT_TPC16_BMON_SPMU:
10091 case GAUDI2_EVENT_TPC17_BMON_SPMU:
10092 case GAUDI2_EVENT_TPC18_BMON_SPMU:
10093 case GAUDI2_EVENT_TPC19_BMON_SPMU:
10094 case GAUDI2_EVENT_TPC20_BMON_SPMU:
10095 case GAUDI2_EVENT_TPC21_BMON_SPMU:
10096 case GAUDI2_EVENT_TPC22_BMON_SPMU:
10097 case GAUDI2_EVENT_TPC23_BMON_SPMU:
10098 case GAUDI2_EVENT_TPC24_BMON_SPMU:
10099 case GAUDI2_EVENT_MME0_CTRL_BMON_SPMU:
10100 case GAUDI2_EVENT_MME0_SBTE_BMON_SPMU:
10101 case GAUDI2_EVENT_MME0_WAP_BMON_SPMU:
10102 case GAUDI2_EVENT_MME1_CTRL_BMON_SPMU:
10103 case GAUDI2_EVENT_MME1_SBTE_BMON_SPMU:
10104 case GAUDI2_EVENT_MME1_WAP_BMON_SPMU:
10105 case GAUDI2_EVENT_MME2_CTRL_BMON_SPMU:
10106 case GAUDI2_EVENT_MME2_SBTE_BMON_SPMU:
10107 case GAUDI2_EVENT_MME2_WAP_BMON_SPMU:
10108 case GAUDI2_EVENT_MME3_CTRL_BMON_SPMU:
10109 case GAUDI2_EVENT_MME3_SBTE_BMON_SPMU:
10110 case GAUDI2_EVENT_MME3_WAP_BMON_SPMU:
10111 case GAUDI2_EVENT_HDMA2_BM_SPMU ... GAUDI2_EVENT_PDMA1_BM_SPMU:
10112 fallthrough;
10113 case GAUDI2_EVENT_DEC0_BMON_SPMU:
10114 case GAUDI2_EVENT_DEC1_BMON_SPMU:
10115 case GAUDI2_EVENT_DEC2_BMON_SPMU:
10116 case GAUDI2_EVENT_DEC3_BMON_SPMU:
10117 case GAUDI2_EVENT_DEC4_BMON_SPMU:
10118 case GAUDI2_EVENT_DEC5_BMON_SPMU:
10119 case GAUDI2_EVENT_DEC6_BMON_SPMU:
10120 case GAUDI2_EVENT_DEC7_BMON_SPMU:
10121 case GAUDI2_EVENT_DEC8_BMON_SPMU:
10122 case GAUDI2_EVENT_DEC9_BMON_SPMU:
10123 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU:
10124 error_count = GAUDI2_NA_EVENT_CAUSE;
10125 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10126 break;
10127
10128 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S:
10129 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_E:
10130 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S:
10131 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
10132 gaudi2_print_clk_change_info(hdev, event_type, &event_mask);
10133 error_count = GAUDI2_NA_EVENT_CAUSE;
10134 break;
10135
10136 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC:
10137 gaudi2_print_out_of_sync_info(hdev, event_type, &eq_entry->pkt_sync_err);
10138 error_count = GAUDI2_NA_EVENT_CAUSE;
10139 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10140 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10141 break;
10142
10143 case GAUDI2_EVENT_PCIE_FLR_REQUESTED:
10144 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10145 error_count = GAUDI2_NA_EVENT_CAUSE;
10146 /* Do nothing- FW will handle it */
10147 break;
10148
10149 case GAUDI2_EVENT_PCIE_P2P_MSIX:
10150 error_count = gaudi2_handle_pcie_p2p_msix(hdev, event_type);
10151 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10152 break;
10153
10154 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
10155 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
10156 error_count = gaudi2_handle_sm_err(hdev, event_type, index);
10157 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10158 break;
10159
10160 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR:
10161 error_count = GAUDI2_NA_EVENT_CAUSE;
10162 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10163 break;
10164
10165 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
10166 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n",
10167 le64_to_cpu(eq_entry->data[0]));
10168 error_count = GAUDI2_NA_EVENT_CAUSE;
10169 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10170 break;
10171 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT:
10172 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n",
10173 le64_to_cpu(eq_entry->data[0]));
10174 error_count = GAUDI2_NA_EVENT_CAUSE;
10175 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10176 break;
10177
10178 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED:
10179 gaudi2_print_cpu_pkt_failure_info(hdev, event_type, &eq_entry->pkt_sync_err);
10180 error_count = GAUDI2_NA_EVENT_CAUSE;
10181 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
10182 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10183 break;
10184
10185 case GAUDI2_EVENT_ARC_DCCM_FULL:
10186 error_count = hl_arc_event_handle(hdev, event_type, &eq_entry->arc_data);
10187 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
10188 break;
10189
10190 case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
10191 case GAUDI2_EVENT_CPU_DEV_RESET_REQ:
10192 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
10193 error_count = GAUDI2_NA_EVENT_CAUSE;
10194 is_critical = true;
10195 break;
10196
10197 case GAUDI2_EVENT_ARC_PWR_BRK_ENTRY:
10198 case GAUDI2_EVENT_ARC_PWR_BRK_EXT:
10199 case GAUDI2_EVENT_ARC_PWR_RD_MODE0:
10200 case GAUDI2_EVENT_ARC_PWR_RD_MODE1:
10201 case GAUDI2_EVENT_ARC_PWR_RD_MODE2:
10202 case GAUDI2_EVENT_ARC_PWR_RD_MODE3:
10203 error_count = GAUDI2_NA_EVENT_CAUSE;
10204 dev_info_ratelimited(hdev->dev, "%s event received\n",
10205 gaudi2_irq_map_table[event_type].name);
10206 break;
10207
10208 case GAUDI2_EVENT_ARC_EQ_HEARTBEAT:
10209 hl_eq_heartbeat_event_handle(hdev);
10210 error_count = GAUDI2_NA_EVENT_CAUSE;
10211 break;
10212 default:
10213 if (gaudi2_irq_map_table[event_type].valid) {
10214 dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
10215 event_type);
10216 error_count = GAUDI2_NA_EVENT_CAUSE;
10217 }
10218 }
10219
10220 if (event_mask & HL_NOTIFIER_EVENT_USER_ENGINE_ERR)
10221 hl_capture_engine_err(hdev, event_id_to_engine_id(hdev, event_type), error_count);
10222
10223 /* Make sure to dump an error in case no error cause was printed so far.
10224 * Note that although we have counted the errors, we use this number as
10225 * a boolean.
10226 */
10227 if (error_count == GAUDI2_NA_EVENT_CAUSE && !is_info_event(event_type))
10228 gaudi2_print_event(hdev, event_type, true, "%d", event_type);
10229 else if (error_count == 0)
10230 gaudi2_print_event(hdev, event_type, true,
10231 "No error cause for H/W event %u", event_type);
10232
10233 if ((gaudi2_irq_map_table[event_type].reset != EVENT_RESET_TYPE_NONE) || reset_required) {
10234 if (reset_required ||
10235 (gaudi2_irq_map_table[event_type].reset == EVENT_RESET_TYPE_HARD))
10236 reset_flags |= HL_DRV_RESET_HARD;
10237
10238 if (hdev->hard_reset_on_fw_events ||
10239 (hdev->asic_prop.fw_security_enabled && is_critical))
10240 goto reset_device;
10241 }
10242
10243 /* Send unmask irq only for interrupts not classified as MSG */
10244 if (!gaudi2_irq_map_table[event_type].msg)
10245 hl_fw_unmask_irq(hdev, event_type);
10246
10247 if (event_mask)
10248 hl_notifier_event_send_all(hdev, event_mask);
10249
10250 return;
10251
10252 reset_device:
10253 if (hdev->asic_prop.fw_security_enabled && is_critical) {
10254 reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
10255 event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
10256 } else {
10257 reset_flags |= HL_DRV_RESET_DELAY;
10258 }
10259 /* escalate general hw errors to critical/fatal error */
10260 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR)
10261 hl_handle_critical_hw_err(hdev, event_type, &event_mask);
10262
10263 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
10264 hl_device_cond_reset(hdev, reset_flags, event_mask);
10265 }
10266
gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device * hdev,struct packet_lin_dma * lin_dma_pkt,u64 phys_addr,u32 hw_queue_id,u32 size,u64 addr,u32 val)10267 static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
10268 struct packet_lin_dma *lin_dma_pkt,
10269 u64 phys_addr, u32 hw_queue_id, u32 size, u64 addr, u32 val)
10270 {
10271 u32 ctl, pkt_size;
10272 int rc = 0, i;
10273
10274 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
10275 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
10276 ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
10277 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
10278
10279 lin_dma_pkt->ctl = cpu_to_le32(ctl);
10280 lin_dma_pkt->src_addr = cpu_to_le64(val);
10281 lin_dma_pkt->dst_addr = cpu_to_le64(addr);
10282 lin_dma_pkt->tsize = cpu_to_le32(size);
10283
10284 pkt_size = sizeof(struct packet_lin_dma);
10285
10286 for (i = 0; i < 3; i++) {
10287 rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10288 phys_addr + (i * sizeof(u64)),
10289 ((u64 *)(lin_dma_pkt)) + i, DEBUGFS_WRITE64);
10290 if (rc) {
10291 dev_err(hdev->dev, "Failed to copy lin_dma packet to HBM (%#llx)\n",
10292 phys_addr);
10293 return rc;
10294 }
10295 }
10296
10297 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr);
10298 if (rc)
10299 dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n",
10300 hw_queue_id);
10301
10302 return rc;
10303 }
10304
gaudi2_memset_device_memory(struct hl_device * hdev,u64 addr,u64 size,u64 val)10305 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
10306 {
10307 u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
10308 GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
10309 GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
10310 GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
10311 u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
10312 old_mmubp, mmubp, num_of_pkts, busy, pkt_size, cb_len;
10313 u64 comp_addr, cur_addr = addr, end_addr = addr + size;
10314 struct asic_fixed_properties *prop = &hdev->asic_prop;
10315 int rc = 0, dma_num = 0, i;
10316 void *lin_dma_pkts_arr;
10317
10318 if (prop->edma_enabled_mask == 0) {
10319 dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
10320 return -EIO;
10321 }
10322
10323 sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10324 sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
10325 comp_addr = CFG_BASE + sob_addr;
10326 comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
10327 FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
10328 mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
10329 FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
10330
10331 /* Calculate how many lin dma pkts we'll need */
10332 num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
10333 pkt_size = sizeof(struct packet_lin_dma);
10334 cb_len = pkt_size * num_of_pkts;
10335
10336 /*
10337 * if we're not scrubing HMMU or NIC reserved sections in hbm,
10338 * then it the scrubing of the user section, as we use the start of the user section
10339 * to store the CB of the EDMA QM, so shift the start address of the scrubbing accordingly
10340 * and scrub the CB section before leaving this function.
10341 */
10342 if ((addr >= prop->dram_user_base_address) &&
10343 (addr < prop->dram_user_base_address + cb_len))
10344 cur_addr += (prop->dram_user_base_address + cb_len) - addr;
10345
10346 lin_dma_pkts_arr = kvcalloc(num_of_pkts, pkt_size, GFP_KERNEL);
10347 if (!lin_dma_pkts_arr)
10348 return -ENOMEM;
10349
10350 /*
10351 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
10352 * only the first one to restore later
10353 * also set the sob addr for all edma cores for completion.
10354 * set QM as trusted to allow it to access physical address with MMU bp.
10355 */
10356 old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
10357 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10358 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10359 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10360 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10361
10362 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10363 continue;
10364
10365 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
10366 edma_offset, mmubp);
10367 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
10368 lower_32_bits(comp_addr));
10369 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
10370 upper_32_bits(comp_addr));
10371 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
10372 comp_val);
10373 gaudi2_qman_set_test_mode(hdev,
10374 edma_queues_id[dcore] + 4 * edma_idx, true);
10375 }
10376 }
10377
10378 WREG32(sob_addr, 0);
10379
10380 while (cur_addr < end_addr) {
10381 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10382 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10383 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10384
10385 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10386 continue;
10387
10388 chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
10389
10390 rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
10391 (struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
10392 prop->dram_user_base_address + (dma_num * pkt_size),
10393 edma_queues_id[dcore] + edma_idx * 4,
10394 chunk_size, cur_addr, val);
10395 if (rc)
10396 goto end;
10397
10398 dma_num++;
10399 cur_addr += chunk_size;
10400 if (cur_addr == end_addr)
10401 goto edma_wait;
10402 }
10403 }
10404 }
10405
10406 edma_wait:
10407 rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
10408 if (rc) {
10409 dev_err(hdev->dev, "DMA Timeout during HBM scrubbing(sob: 0x%x, dma_num: 0x%x)\n",
10410 busy, dma_num);
10411 goto end;
10412 }
10413 end:
10414 for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
10415 for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
10416 u32 edma_offset = dcore * DCORE_OFFSET + edma_idx * DCORE_EDMA_OFFSET;
10417 u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
10418
10419 if (!(prop->edma_enabled_mask & BIT(edma_bit)))
10420 continue;
10421
10422 WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
10423 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
10424 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
10425 WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
10426 gaudi2_qman_set_test_mode(hdev,
10427 edma_queues_id[dcore] + 4 * edma_idx, false);
10428 }
10429 }
10430
10431 memset(lin_dma_pkts_arr, 0, sizeof(u64));
10432
10433 /* Zero the HBM area where we copied the CB */
10434 for (i = 0; i < cb_len / sizeof(u64); i += sizeof(u64))
10435 rc = hdev->asic_funcs->access_dev_mem(hdev, PCI_REGION_DRAM,
10436 prop->dram_user_base_address + i,
10437 (u64 *)(lin_dma_pkts_arr), DEBUGFS_WRITE64);
10438 WREG32(sob_addr, 0);
10439
10440 kfree(lin_dma_pkts_arr);
10441
10442 return rc;
10443 }
10444
gaudi2_scrub_device_dram(struct hl_device * hdev,u64 val)10445 static int gaudi2_scrub_device_dram(struct hl_device *hdev, u64 val)
10446 {
10447 int rc;
10448 struct asic_fixed_properties *prop = &hdev->asic_prop;
10449 u64 size = prop->dram_end_address - prop->dram_user_base_address;
10450
10451 rc = gaudi2_memset_device_memory(hdev, prop->dram_user_base_address, size, val);
10452
10453 if (rc)
10454 dev_err(hdev->dev, "Failed to scrub dram, address: 0x%llx size: %llu\n",
10455 prop->dram_user_base_address, size);
10456 return rc;
10457 }
10458
gaudi2_scrub_device_mem(struct hl_device * hdev)10459 static int gaudi2_scrub_device_mem(struct hl_device *hdev)
10460 {
10461 int rc;
10462 struct asic_fixed_properties *prop = &hdev->asic_prop;
10463 u64 val = hdev->memory_scrub_val;
10464 u64 addr, size;
10465
10466 if (!hdev->memory_scrub)
10467 return 0;
10468
10469 /* scrub SRAM */
10470 addr = prop->sram_user_base_address;
10471 size = hdev->pldm ? 0x10000 : (prop->sram_size - SRAM_USER_BASE_OFFSET);
10472 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx, val: 0x%llx\n",
10473 addr, addr + size, val);
10474 rc = gaudi2_memset_device_memory(hdev, addr, size, val);
10475 if (rc) {
10476 dev_err(hdev->dev, "scrubbing SRAM failed (%d)\n", rc);
10477 return rc;
10478 }
10479
10480 /* scrub DRAM */
10481 rc = gaudi2_scrub_device_dram(hdev, val);
10482 if (rc) {
10483 dev_err(hdev->dev, "scrubbing DRAM failed (%d)\n", rc);
10484 return rc;
10485 }
10486 return 0;
10487 }
10488
gaudi2_restore_user_sm_registers(struct hl_device * hdev)10489 static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
10490 {
10491 u64 addr, mon_sts_addr, mon_cfg_addr, cq_lbw_l_addr, cq_lbw_h_addr,
10492 cq_lbw_data_addr, cq_base_l_addr, cq_base_h_addr, cq_size_addr;
10493 u32 val, size, offset;
10494 int dcore_id;
10495
10496 offset = hdev->asic_prop.first_available_cq[0] * 4;
10497 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset;
10498 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + offset;
10499 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + offset;
10500 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + offset;
10501 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + offset;
10502 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + offset;
10503 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 -
10504 (mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + offset);
10505
10506 /* memset dcore0 CQ registers */
10507 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10508 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10509 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10510 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10511 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10512 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10513
10514 cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
10515 cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
10516 cq_lbw_data_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0 + DCORE_OFFSET;
10517 cq_base_l_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_L_0 + DCORE_OFFSET;
10518 cq_base_h_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_BASE_ADDR_H_0 + DCORE_OFFSET;
10519 cq_size_addr = mmDCORE0_SYNC_MNGR_GLBL_CQ_SIZE_LOG2_0 + DCORE_OFFSET;
10520 size = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 - mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0;
10521
10522 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10523 gaudi2_memset_device_lbw(hdev, cq_lbw_l_addr, size, 0);
10524 gaudi2_memset_device_lbw(hdev, cq_lbw_h_addr, size, 0);
10525 gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
10526 gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
10527 gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
10528 gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
10529
10530 cq_lbw_l_addr += DCORE_OFFSET;
10531 cq_lbw_h_addr += DCORE_OFFSET;
10532 cq_lbw_data_addr += DCORE_OFFSET;
10533 cq_base_l_addr += DCORE_OFFSET;
10534 cq_base_h_addr += DCORE_OFFSET;
10535 cq_size_addr += DCORE_OFFSET;
10536 }
10537
10538 offset = hdev->asic_prop.first_available_user_mon[0] * 4;
10539 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset;
10540 val = 1 << DCORE0_SYNC_MNGR_OBJS_MON_STATUS_PROT_SHIFT;
10541 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - (mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + offset);
10542
10543 /* memset dcore0 monitors */
10544 gaudi2_memset_device_lbw(hdev, addr, size, val);
10545
10546 addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + offset;
10547 gaudi2_memset_device_lbw(hdev, addr, size, 0);
10548
10549 mon_sts_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0 + DCORE_OFFSET;
10550 mon_cfg_addr = mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + DCORE_OFFSET;
10551 size = mmDCORE0_SYNC_MNGR_OBJS_SM_SEC_0 - mmDCORE0_SYNC_MNGR_OBJS_MON_STATUS_0;
10552
10553 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10554 gaudi2_memset_device_lbw(hdev, mon_sts_addr, size, val);
10555 gaudi2_memset_device_lbw(hdev, mon_cfg_addr, size, 0);
10556 mon_sts_addr += DCORE_OFFSET;
10557 mon_cfg_addr += DCORE_OFFSET;
10558 }
10559
10560 offset = hdev->asic_prop.first_available_user_sob[0] * 4;
10561 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset;
10562 val = 0;
10563 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 -
10564 (mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10565
10566 /* memset dcore0 sobs */
10567 gaudi2_memset_device_lbw(hdev, addr, size, val);
10568
10569 addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + DCORE_OFFSET;
10570 size = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 - mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0;
10571
10572 for (dcore_id = 1 ; dcore_id < NUM_OF_DCORES ; dcore_id++) {
10573 gaudi2_memset_device_lbw(hdev, addr, size, val);
10574 addr += DCORE_OFFSET;
10575 }
10576
10577 /* Flush all WREG to prevent race */
10578 val = RREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + offset);
10579 }
10580
gaudi2_restore_user_qm_registers(struct hl_device * hdev)10581 static void gaudi2_restore_user_qm_registers(struct hl_device *hdev)
10582 {
10583 u32 reg_base, hw_queue_id;
10584
10585 for (hw_queue_id = GAUDI2_QUEUE_ID_PDMA_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_ROT_1_0;
10586 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10587 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10588 continue;
10589
10590 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10591
10592 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10593 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10594 }
10595
10596 /* Flush all WREG to prevent race */
10597 RREG32(mmPDMA0_QM_ARB_CFG_0);
10598 }
10599
gaudi2_restore_nic_qm_registers(struct hl_device * hdev)10600 static void gaudi2_restore_nic_qm_registers(struct hl_device *hdev)
10601 {
10602 u32 reg_base, hw_queue_id;
10603
10604 for (hw_queue_id = GAUDI2_QUEUE_ID_NIC_0_0 ; hw_queue_id <= GAUDI2_QUEUE_ID_NIC_23_3;
10605 hw_queue_id += NUM_OF_PQ_PER_QMAN) {
10606 if (!gaudi2_is_queue_enabled(hdev, hw_queue_id))
10607 continue;
10608
10609 gaudi2_clear_qm_fence_counters_common(hdev, hw_queue_id, false);
10610
10611 reg_base = gaudi2_qm_blocks_bases[hw_queue_id];
10612 WREG32(reg_base + QM_ARB_CFG_0_OFFSET, 0);
10613 }
10614
10615 /* Flush all WREG to prevent race */
10616 RREG32(mmPDMA0_QM_ARB_CFG_0);
10617 }
10618
gaudi2_context_switch(struct hl_device * hdev,u32 asid)10619 static int gaudi2_context_switch(struct hl_device *hdev, u32 asid)
10620 {
10621 return 0;
10622 }
10623
gaudi2_restore_phase_topology(struct hl_device * hdev)10624 static void gaudi2_restore_phase_topology(struct hl_device *hdev)
10625 {
10626 }
10627
gaudi2_init_block_instances(struct hl_device * hdev,u32 block_idx,struct dup_block_ctx * cfg_ctx)10628 static void gaudi2_init_block_instances(struct hl_device *hdev, u32 block_idx,
10629 struct dup_block_ctx *cfg_ctx)
10630 {
10631 u64 block_base = cfg_ctx->base + block_idx * cfg_ctx->block_off;
10632 u8 seq;
10633 int i;
10634
10635 for (i = 0 ; i < cfg_ctx->instances ; i++) {
10636 seq = block_idx * cfg_ctx->instances + i;
10637
10638 /* skip disabled instance */
10639 if (!(cfg_ctx->enabled_mask & BIT_ULL(seq)))
10640 continue;
10641
10642 cfg_ctx->instance_cfg_fn(hdev, block_base + i * cfg_ctx->instance_off,
10643 cfg_ctx->data);
10644 }
10645 }
10646
gaudi2_init_blocks_with_mask(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx,u64 mask)10647 static void gaudi2_init_blocks_with_mask(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx,
10648 u64 mask)
10649 {
10650 int i;
10651
10652 cfg_ctx->enabled_mask = mask;
10653
10654 for (i = 0 ; i < cfg_ctx->blocks ; i++)
10655 gaudi2_init_block_instances(hdev, i, cfg_ctx);
10656 }
10657
gaudi2_init_blocks(struct hl_device * hdev,struct dup_block_ctx * cfg_ctx)10658 void gaudi2_init_blocks(struct hl_device *hdev, struct dup_block_ctx *cfg_ctx)
10659 {
10660 gaudi2_init_blocks_with_mask(hdev, cfg_ctx, U64_MAX);
10661 }
10662
gaudi2_debugfs_read_dma(struct hl_device * hdev,u64 addr,u32 size,void * blob_addr)10663 static int gaudi2_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr)
10664 {
10665 void *host_mem_virtual_addr;
10666 dma_addr_t host_mem_dma_addr;
10667 u64 reserved_va_base;
10668 u32 pos, size_left, size_to_dma;
10669 struct hl_ctx *ctx;
10670 int rc = 0;
10671
10672 /* Fetch the ctx */
10673 ctx = hl_get_compute_ctx(hdev);
10674 if (!ctx) {
10675 dev_err(hdev->dev, "No ctx available\n");
10676 return -EINVAL;
10677 }
10678
10679 /* Allocate buffers for read and for poll */
10680 host_mem_virtual_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &host_mem_dma_addr,
10681 GFP_KERNEL | __GFP_ZERO);
10682 if (host_mem_virtual_addr == NULL) {
10683 dev_err(hdev->dev, "Failed to allocate memory for KDMA read\n");
10684 rc = -ENOMEM;
10685 goto put_ctx;
10686 }
10687
10688 /* Reserve VM region on asic side */
10689 reserved_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, SZ_2M,
10690 HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10691 if (!reserved_va_base) {
10692 dev_err(hdev->dev, "Failed to reserve vmem on asic\n");
10693 rc = -ENOMEM;
10694 goto free_data_buffer;
10695 }
10696
10697 /* Create mapping on asic side */
10698 mutex_lock(&hdev->mmu_lock);
10699
10700 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M);
10701 if (rc) {
10702 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n");
10703 goto unreserve_va;
10704 }
10705
10706 rc = hl_mmu_invalidate_cache_range(hdev, false,
10707 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV,
10708 ctx->asid, reserved_va_base, SZ_2M);
10709 if (rc) {
10710 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10711 goto unreserve_va;
10712 }
10713
10714 mutex_unlock(&hdev->mmu_lock);
10715
10716 /* Enable MMU on KDMA */
10717 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid);
10718
10719 pos = 0;
10720 size_left = size;
10721 size_to_dma = SZ_2M;
10722
10723 while (size_left > 0) {
10724 if (size_left < SZ_2M)
10725 size_to_dma = size_left;
10726
10727 rc = gaudi2_send_job_to_kdma(hdev, addr, reserved_va_base, size_to_dma, false);
10728 if (rc)
10729 break;
10730
10731 memcpy(blob_addr + pos, host_mem_virtual_addr, size_to_dma);
10732
10733 if (size_left <= SZ_2M)
10734 break;
10735
10736 pos += SZ_2M;
10737 addr += SZ_2M;
10738 size_left -= SZ_2M;
10739 }
10740
10741 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID);
10742
10743 mutex_lock(&hdev->mmu_lock);
10744
10745 rc = hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M);
10746 if (rc)
10747 goto unreserve_va;
10748
10749 rc = hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR,
10750 ctx->asid, reserved_va_base, SZ_2M);
10751
10752 unreserve_va:
10753 mutex_unlock(&hdev->mmu_lock);
10754 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M);
10755 free_data_buffer:
10756 hl_asic_dma_free_coherent(hdev, SZ_2M, host_mem_virtual_addr, host_mem_dma_addr);
10757 put_ctx:
10758 hl_ctx_put(ctx);
10759
10760 return rc;
10761 }
10762
gaudi2_internal_cb_pool_init(struct hl_device * hdev,struct hl_ctx * ctx)10763 static int gaudi2_internal_cb_pool_init(struct hl_device *hdev, struct hl_ctx *ctx)
10764 {
10765 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10766 int min_alloc_order, rc;
10767
10768 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10769 return 0;
10770
10771 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev,
10772 HOST_SPACE_INTERNAL_CB_SZ,
10773 &hdev->internal_cb_pool_dma_addr,
10774 GFP_KERNEL | __GFP_ZERO);
10775
10776 if (!hdev->internal_cb_pool_virt_addr)
10777 return -ENOMEM;
10778
10779 min_alloc_order = ilog2(min(gaudi2_get_signal_cb_size(hdev),
10780 gaudi2_get_wait_cb_size(hdev)));
10781
10782 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1);
10783 if (!hdev->internal_cb_pool) {
10784 dev_err(hdev->dev, "Failed to create internal CB pool\n");
10785 rc = -ENOMEM;
10786 goto free_internal_cb_pool;
10787 }
10788
10789 rc = gen_pool_add(hdev->internal_cb_pool, (uintptr_t) hdev->internal_cb_pool_virt_addr,
10790 HOST_SPACE_INTERNAL_CB_SZ, -1);
10791 if (rc) {
10792 dev_err(hdev->dev, "Failed to add memory to internal CB pool\n");
10793 rc = -EFAULT;
10794 goto destroy_internal_cb_pool;
10795 }
10796
10797 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
10798 HOST_SPACE_INTERNAL_CB_SZ, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
10799
10800 if (!hdev->internal_cb_va_base) {
10801 rc = -ENOMEM;
10802 goto destroy_internal_cb_pool;
10803 }
10804
10805 mutex_lock(&hdev->mmu_lock);
10806
10807 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr,
10808 HOST_SPACE_INTERNAL_CB_SZ);
10809 if (rc)
10810 goto unreserve_internal_cb_pool;
10811
10812 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR);
10813 if (rc)
10814 goto unmap_internal_cb_pool;
10815
10816 mutex_unlock(&hdev->mmu_lock);
10817
10818 return 0;
10819
10820 unmap_internal_cb_pool:
10821 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10822 unreserve_internal_cb_pool:
10823 mutex_unlock(&hdev->mmu_lock);
10824 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10825 destroy_internal_cb_pool:
10826 gen_pool_destroy(hdev->internal_cb_pool);
10827 free_internal_cb_pool:
10828 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10829 hdev->internal_cb_pool_dma_addr);
10830
10831 return rc;
10832 }
10833
gaudi2_internal_cb_pool_fini(struct hl_device * hdev,struct hl_ctx * ctx)10834 static void gaudi2_internal_cb_pool_fini(struct hl_device *hdev, struct hl_ctx *ctx)
10835 {
10836 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10837
10838 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU))
10839 return;
10840
10841 mutex_lock(&hdev->mmu_lock);
10842 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10843 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ);
10844 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
10845 mutex_unlock(&hdev->mmu_lock);
10846
10847 gen_pool_destroy(hdev->internal_cb_pool);
10848
10849 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr,
10850 hdev->internal_cb_pool_dma_addr);
10851 }
10852
gaudi2_restore_user_registers(struct hl_device * hdev)10853 static void gaudi2_restore_user_registers(struct hl_device *hdev)
10854 {
10855 gaudi2_restore_user_sm_registers(hdev);
10856 gaudi2_restore_user_qm_registers(hdev);
10857 }
10858
gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10859 static int gaudi2_map_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10860 {
10861 struct hl_device *hdev = ctx->hdev;
10862 struct asic_fixed_properties *prop = &hdev->asic_prop;
10863 struct gaudi2_device *gaudi2 = hdev->asic_specific;
10864 int rc;
10865
10866 rc = hl_mmu_map_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10867 gaudi2->virt_msix_db_dma_addr, prop->pmmu.page_size, true);
10868 if (rc)
10869 dev_err(hdev->dev, "Failed to map VA %#llx for virtual MSI-X doorbell memory\n",
10870 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10871
10872 return rc;
10873 }
10874
gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx * ctx)10875 static void gaudi2_unmap_virtual_msix_doorbell_memory(struct hl_ctx *ctx)
10876 {
10877 struct hl_device *hdev = ctx->hdev;
10878 struct asic_fixed_properties *prop = &hdev->asic_prop;
10879 int rc;
10880
10881 rc = hl_mmu_unmap_page(ctx, RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START,
10882 prop->pmmu.page_size, true);
10883 if (rc)
10884 dev_err(hdev->dev, "Failed to unmap VA %#llx of virtual MSI-X doorbell memory\n",
10885 RESERVED_VA_FOR_VIRTUAL_MSIX_DOORBELL_START);
10886 }
10887
gaudi2_ctx_init(struct hl_ctx * ctx)10888 static int gaudi2_ctx_init(struct hl_ctx *ctx)
10889 {
10890 int rc;
10891
10892 if (ctx->asid == HL_KERNEL_ASID_ID)
10893 return 0;
10894
10895 rc = gaudi2_mmu_prepare(ctx->hdev, ctx->asid);
10896 if (rc)
10897 return rc;
10898
10899 /* No need to clear user registers if the device has just
10900 * performed reset, we restore only nic qm registers
10901 */
10902 if (ctx->hdev->reset_upon_device_release)
10903 gaudi2_restore_nic_qm_registers(ctx->hdev);
10904 else
10905 gaudi2_restore_user_registers(ctx->hdev);
10906
10907 rc = gaudi2_internal_cb_pool_init(ctx->hdev, ctx);
10908 if (rc)
10909 return rc;
10910
10911 rc = gaudi2_map_virtual_msix_doorbell_memory(ctx);
10912 if (rc)
10913 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10914
10915 return rc;
10916 }
10917
gaudi2_ctx_fini(struct hl_ctx * ctx)10918 static void gaudi2_ctx_fini(struct hl_ctx *ctx)
10919 {
10920 if (ctx->asid == HL_KERNEL_ASID_ID)
10921 return;
10922
10923 gaudi2_internal_cb_pool_fini(ctx->hdev, ctx);
10924
10925 gaudi2_unmap_virtual_msix_doorbell_memory(ctx);
10926 }
10927
gaudi2_pre_schedule_cs(struct hl_cs * cs)10928 static int gaudi2_pre_schedule_cs(struct hl_cs *cs)
10929 {
10930 struct hl_device *hdev = cs->ctx->hdev;
10931 int index = cs->sequence & (hdev->asic_prop.max_pending_cs - 1);
10932 u32 mon_payload, sob_id, mon_id;
10933
10934 if (!cs_needs_completion(cs))
10935 return 0;
10936
10937 /*
10938 * First 64 SOB/MON are reserved for driver for QMAN auto completion
10939 * mechanism. Each SOB/MON pair are used for a pending CS with the same
10940 * cyclic index. The SOB value is increased when each of the CS jobs is
10941 * completed. When the SOB reaches the number of CS jobs, the monitor
10942 * generates MSI-X interrupt.
10943 */
10944
10945 sob_id = mon_id = index;
10946 mon_payload = (1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
10947 (1 << CQ_ENTRY_READY_SHIFT) | index;
10948
10949 gaudi2_arm_cq_monitor(hdev, sob_id, mon_id, GAUDI2_RESERVED_CQ_CS_COMPLETION, mon_payload,
10950 cs->jobs_cnt);
10951
10952 return 0;
10953 }
10954
gaudi2_get_queue_id_for_cq(struct hl_device * hdev,u32 cq_idx)10955 static u32 gaudi2_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
10956 {
10957 return HL_INVALID_QUEUE;
10958 }
10959
gaudi2_gen_signal_cb(struct hl_device * hdev,void * data,u16 sob_id,u32 size,bool eb)10960 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb)
10961 {
10962 struct hl_cb *cb = data;
10963 struct packet_msg_short *pkt;
10964 u32 value, ctl, pkt_size = sizeof(*pkt);
10965
10966 pkt = (struct packet_msg_short *) (uintptr_t) (cb->kernel_address + size);
10967 memset(pkt, 0, pkt_size);
10968
10969 /* Inc by 1, Mode ADD */
10970 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1);
10971 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_SOB_MOD_MASK, 1);
10972
10973 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
10974 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 1); /* SOB base */
10975 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10976 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, eb);
10977 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
10978
10979 pkt->value = cpu_to_le32(value);
10980 pkt->ctl = cpu_to_le32(ctl);
10981
10982 return size + pkt_size;
10983 }
10984
gaudi2_add_mon_msg_short(struct packet_msg_short * pkt,u32 value,u16 addr)10985 static u32 gaudi2_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, u16 addr)
10986 {
10987 u32 ctl, pkt_size = sizeof(*pkt);
10988
10989 memset(pkt, 0, pkt_size);
10990
10991 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
10992 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
10993 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
10994 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
10995 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 0);
10996
10997 pkt->value = cpu_to_le32(value);
10998 pkt->ctl = cpu_to_le32(ctl);
10999
11000 return pkt_size;
11001 }
11002
gaudi2_add_arm_monitor_pkt(struct hl_device * hdev,struct packet_msg_short * pkt,u16 sob_base,u8 sob_mask,u16 sob_val,u16 addr)11003 static u32 gaudi2_add_arm_monitor_pkt(struct hl_device *hdev, struct packet_msg_short *pkt,
11004 u16 sob_base, u8 sob_mask, u16 sob_val, u16 addr)
11005 {
11006 u32 ctl, value, pkt_size = sizeof(*pkt);
11007 u8 mask;
11008
11009 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) {
11010 dev_err(hdev->dev, "sob_base %u (mask %#x) is not valid\n", sob_base, sob_mask);
11011 return 0;
11012 }
11013
11014 memset(pkt, 0, pkt_size);
11015
11016 value = FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8);
11017 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val);
11018 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MODE_MASK, 0); /* GREATER OR EQUAL*/
11019 value |= FIELD_PREP(GAUDI2_PKT_SHORT_VAL_MON_MASK_MASK, mask);
11020
11021 ctl = FIELD_PREP(GAUDI2_PKT_SHORT_CTL_ADDR_MASK, addr);
11022 ctl |= FIELD_PREP(GAUDI2_PKT_SHORT_CTL_BASE_MASK, 0); /* MON base */
11023 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
11024 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11025 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11026
11027 pkt->value = cpu_to_le32(value);
11028 pkt->ctl = cpu_to_le32(ctl);
11029
11030 return pkt_size;
11031 }
11032
gaudi2_add_fence_pkt(struct packet_fence * pkt)11033 static u32 gaudi2_add_fence_pkt(struct packet_fence *pkt)
11034 {
11035 u32 ctl, cfg, pkt_size = sizeof(*pkt);
11036
11037 memset(pkt, 0, pkt_size);
11038
11039 cfg = FIELD_PREP(GAUDI2_PKT_FENCE_CFG_DEC_VAL_MASK, 1);
11040 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
11041 cfg |= FIELD_PREP(GAUDI2_PKT_FENCE_CFG_ID_MASK, 2);
11042
11043 ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
11044 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 0);
11045 ctl |= FIELD_PREP(GAUDI2_PKT_CTL_MB_MASK, 1);
11046
11047 pkt->cfg = cpu_to_le32(cfg);
11048 pkt->ctl = cpu_to_le32(ctl);
11049
11050 return pkt_size;
11051 }
11052
gaudi2_gen_wait_cb(struct hl_device * hdev,struct hl_gen_wait_properties * prop)11053 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop)
11054 {
11055 struct hl_cb *cb = prop->data;
11056 void *buf = (void *) (uintptr_t) (cb->kernel_address);
11057
11058 u64 monitor_base, fence_addr = 0;
11059 u32 stream_index, size = prop->size;
11060 u16 msg_addr_offset;
11061
11062 stream_index = prop->q_idx % 4;
11063 fence_addr = CFG_BASE + gaudi2_qm_blocks_bases[prop->q_idx] +
11064 QM_FENCE2_OFFSET + stream_index * 4;
11065
11066 /*
11067 * monitor_base should be the content of the base0 address registers,
11068 * so it will be added to the msg short offsets
11069 */
11070 monitor_base = mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
11071
11072 /* First monitor config packet: low address of the sync */
11073 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + prop->mon_id * 4) -
11074 monitor_base;
11075
11076 size += gaudi2_add_mon_msg_short(buf + size, (u32) fence_addr, msg_addr_offset);
11077
11078 /* Second monitor config packet: high address of the sync */
11079 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + prop->mon_id * 4) -
11080 monitor_base;
11081
11082 size += gaudi2_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), msg_addr_offset);
11083
11084 /*
11085 * Third monitor config packet: the payload, i.e. what to write when the
11086 * sync triggers
11087 */
11088 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + prop->mon_id * 4) -
11089 monitor_base;
11090
11091 size += gaudi2_add_mon_msg_short(buf + size, 1, msg_addr_offset);
11092
11093 /* Fourth monitor config packet: bind the monitor to a sync object */
11094 msg_addr_offset = (mmDCORE0_SYNC_MNGR_OBJS_MON_ARM_0 + prop->mon_id * 4) - monitor_base;
11095
11096 size += gaudi2_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, prop->sob_mask,
11097 prop->sob_val, msg_addr_offset);
11098
11099 /* Fence packet */
11100 size += gaudi2_add_fence_pkt(buf + size);
11101
11102 return size;
11103 }
11104
gaudi2_reset_sob(struct hl_device * hdev,void * data)11105 static void gaudi2_reset_sob(struct hl_device *hdev, void *data)
11106 {
11107 struct hl_hw_sob *hw_sob = data;
11108
11109 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id);
11110
11111 WREG32(mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4, 0);
11112
11113 kref_init(&hw_sob->kref);
11114 }
11115
gaudi2_reset_sob_group(struct hl_device * hdev,u16 sob_group)11116 static void gaudi2_reset_sob_group(struct hl_device *hdev, u16 sob_group)
11117 {
11118 }
11119
gaudi2_get_device_time(struct hl_device * hdev)11120 static u64 gaudi2_get_device_time(struct hl_device *hdev)
11121 {
11122 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
11123
11124 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
11125 }
11126
gaudi2_collective_wait_init_cs(struct hl_cs * cs)11127 static int gaudi2_collective_wait_init_cs(struct hl_cs *cs)
11128 {
11129 return 0;
11130 }
11131
gaudi2_collective_wait_create_jobs(struct hl_device * hdev,struct hl_ctx * ctx,struct hl_cs * cs,u32 wait_queue_id,u32 collective_engine_id,u32 encaps_signal_offset)11132 static int gaudi2_collective_wait_create_jobs(struct hl_device *hdev, struct hl_ctx *ctx,
11133 struct hl_cs *cs, u32 wait_queue_id,
11134 u32 collective_engine_id, u32 encaps_signal_offset)
11135 {
11136 return -EINVAL;
11137 }
11138
11139 /*
11140 * hl_mmu_scramble - converts a dram (non power of 2) page-size aligned address
11141 * to DMMU page-size address (64MB) before mapping it in
11142 * the MMU.
11143 * The operation is performed on both the virtual and physical addresses.
11144 * for device with 6 HBMs the scramble is:
11145 * (addr[47:0] / 48M) * 64M + addr % 48M + addr[63:48]
11146 *
11147 * Example:
11148 * =============================================================================
11149 * Allocated DRAM Reserved VA scrambled VA for MMU mapping Scrambled PA
11150 * Phys address in MMU last
11151 * HOP
11152 * =============================================================================
11153 * PA1 0x3000000 VA1 0x9C000000 SVA1= (VA1/48M)*64M 0xD0000000 <- PA1/48M 0x1
11154 * PA2 0x9000000 VA2 0x9F000000 SVA2= (VA2/48M)*64M 0xD4000000 <- PA2/48M 0x3
11155 * =============================================================================
11156 */
gaudi2_mmu_scramble_addr(struct hl_device * hdev,u64 raw_addr)11157 static u64 gaudi2_mmu_scramble_addr(struct hl_device *hdev, u64 raw_addr)
11158 {
11159 struct asic_fixed_properties *prop = &hdev->asic_prop;
11160 u32 divisor, mod_va;
11161 u64 div_va;
11162
11163 /* accept any address in the DRAM address space */
11164 if (hl_mem_area_inside_range(raw_addr, sizeof(raw_addr), DRAM_PHYS_BASE,
11165 VA_HBM_SPACE_END)) {
11166
11167 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11168 div_va = div_u64_rem(raw_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK, divisor, &mod_va);
11169 return (raw_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) |
11170 (div_va << GAUDI2_HBM_MMU_SCRM_DIV_SHIFT) |
11171 (mod_va << GAUDI2_HBM_MMU_SCRM_MOD_SHIFT);
11172 }
11173
11174 return raw_addr;
11175 }
11176
gaudi2_mmu_descramble_addr(struct hl_device * hdev,u64 scrambled_addr)11177 static u64 gaudi2_mmu_descramble_addr(struct hl_device *hdev, u64 scrambled_addr)
11178 {
11179 struct asic_fixed_properties *prop = &hdev->asic_prop;
11180 u32 divisor, mod_va;
11181 u64 div_va;
11182
11183 /* accept any address in the DRAM address space */
11184 if (hl_mem_area_inside_range(scrambled_addr, sizeof(scrambled_addr), DRAM_PHYS_BASE,
11185 VA_HBM_SPACE_END)) {
11186
11187 divisor = prop->num_functional_hbms * GAUDI2_HBM_MMU_SCRM_MEM_SIZE;
11188 div_va = div_u64_rem(scrambled_addr & GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK,
11189 PAGE_SIZE_64MB, &mod_va);
11190
11191 return ((scrambled_addr & ~GAUDI2_HBM_MMU_SCRM_ADDRESS_MASK) +
11192 (div_va * divisor + mod_va));
11193 }
11194
11195 return scrambled_addr;
11196 }
11197
gaudi2_get_dec_base_addr(struct hl_device * hdev,u32 core_id)11198 static u32 gaudi2_get_dec_base_addr(struct hl_device *hdev, u32 core_id)
11199 {
11200 u32 base = 0, dcore_id, dec_id;
11201
11202 if (core_id >= NUMBER_OF_DEC) {
11203 dev_err(hdev->dev, "Unexpected core number %d for DEC\n", core_id);
11204 goto out;
11205 }
11206
11207 if (core_id < 8) {
11208 dcore_id = core_id / NUM_OF_DEC_PER_DCORE;
11209 dec_id = core_id % NUM_OF_DEC_PER_DCORE;
11210
11211 base = mmDCORE0_DEC0_CMD_BASE + dcore_id * DCORE_OFFSET +
11212 dec_id * DCORE_VDEC_OFFSET;
11213 } else {
11214 /* PCIe Shared Decoder */
11215 base = mmPCIE_DEC0_CMD_BASE + ((core_id % 8) * PCIE_VDEC_OFFSET);
11216 }
11217 out:
11218 return base;
11219 }
11220
gaudi2_get_hw_block_id(struct hl_device * hdev,u64 block_addr,u32 * block_size,u32 * block_id)11221 static int gaudi2_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
11222 u32 *block_size, u32 *block_id)
11223 {
11224 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11225 int i;
11226
11227 for (i = 0 ; i < NUM_USER_MAPPED_BLOCKS ; i++) {
11228 if (block_addr == CFG_BASE + gaudi2->mapped_blocks[i].address) {
11229 *block_id = i;
11230 if (block_size)
11231 *block_size = gaudi2->mapped_blocks[i].size;
11232 return 0;
11233 }
11234 }
11235
11236 dev_err(hdev->dev, "Invalid block address %#llx", block_addr);
11237
11238 return -EINVAL;
11239 }
11240
gaudi2_block_mmap(struct hl_device * hdev,struct vm_area_struct * vma,u32 block_id,u32 block_size)11241 static int gaudi2_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
11242 u32 block_id, u32 block_size)
11243 {
11244 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11245 u64 offset_in_bar;
11246 u64 address;
11247 int rc;
11248
11249 if (block_id >= NUM_USER_MAPPED_BLOCKS) {
11250 dev_err(hdev->dev, "Invalid block id %u", block_id);
11251 return -EINVAL;
11252 }
11253
11254 /* we allow mapping only an entire block */
11255 if (block_size != gaudi2->mapped_blocks[block_id].size) {
11256 dev_err(hdev->dev, "Invalid block size %u", block_size);
11257 return -EINVAL;
11258 }
11259
11260 offset_in_bar = CFG_BASE + gaudi2->mapped_blocks[block_id].address - STM_FLASH_BASE_ADDR;
11261
11262 address = pci_resource_start(hdev->pdev, SRAM_CFG_BAR_ID) + offset_in_bar;
11263
11264 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
11265 VM_DONTCOPY | VM_NORESERVE);
11266
11267 rc = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT,
11268 block_size, vma->vm_page_prot);
11269 if (rc)
11270 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
11271
11272 return rc;
11273 }
11274
gaudi2_enable_events_from_fw(struct hl_device * hdev)11275 static void gaudi2_enable_events_from_fw(struct hl_device *hdev)
11276 {
11277 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11278
11279 struct cpu_dyn_regs *dyn_regs = &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
11280 u32 irq_handler_offset = le32_to_cpu(dyn_regs->gic_host_ints_irq);
11281
11282 if (gaudi2->hw_cap_initialized & HW_CAP_CPU_Q)
11283 WREG32(irq_handler_offset,
11284 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_INTS_REGISTER].cpu_id);
11285 }
11286
gaudi2_get_mmu_base(struct hl_device * hdev,u64 mmu_id,u32 * mmu_base)11287 static int gaudi2_get_mmu_base(struct hl_device *hdev, u64 mmu_id, u32 *mmu_base)
11288 {
11289 switch (mmu_id) {
11290 case HW_CAP_DCORE0_DMMU0:
11291 *mmu_base = mmDCORE0_HMMU0_MMU_BASE;
11292 break;
11293 case HW_CAP_DCORE0_DMMU1:
11294 *mmu_base = mmDCORE0_HMMU1_MMU_BASE;
11295 break;
11296 case HW_CAP_DCORE0_DMMU2:
11297 *mmu_base = mmDCORE0_HMMU2_MMU_BASE;
11298 break;
11299 case HW_CAP_DCORE0_DMMU3:
11300 *mmu_base = mmDCORE0_HMMU3_MMU_BASE;
11301 break;
11302 case HW_CAP_DCORE1_DMMU0:
11303 *mmu_base = mmDCORE1_HMMU0_MMU_BASE;
11304 break;
11305 case HW_CAP_DCORE1_DMMU1:
11306 *mmu_base = mmDCORE1_HMMU1_MMU_BASE;
11307 break;
11308 case HW_CAP_DCORE1_DMMU2:
11309 *mmu_base = mmDCORE1_HMMU2_MMU_BASE;
11310 break;
11311 case HW_CAP_DCORE1_DMMU3:
11312 *mmu_base = mmDCORE1_HMMU3_MMU_BASE;
11313 break;
11314 case HW_CAP_DCORE2_DMMU0:
11315 *mmu_base = mmDCORE2_HMMU0_MMU_BASE;
11316 break;
11317 case HW_CAP_DCORE2_DMMU1:
11318 *mmu_base = mmDCORE2_HMMU1_MMU_BASE;
11319 break;
11320 case HW_CAP_DCORE2_DMMU2:
11321 *mmu_base = mmDCORE2_HMMU2_MMU_BASE;
11322 break;
11323 case HW_CAP_DCORE2_DMMU3:
11324 *mmu_base = mmDCORE2_HMMU3_MMU_BASE;
11325 break;
11326 case HW_CAP_DCORE3_DMMU0:
11327 *mmu_base = mmDCORE3_HMMU0_MMU_BASE;
11328 break;
11329 case HW_CAP_DCORE3_DMMU1:
11330 *mmu_base = mmDCORE3_HMMU1_MMU_BASE;
11331 break;
11332 case HW_CAP_DCORE3_DMMU2:
11333 *mmu_base = mmDCORE3_HMMU2_MMU_BASE;
11334 break;
11335 case HW_CAP_DCORE3_DMMU3:
11336 *mmu_base = mmDCORE3_HMMU3_MMU_BASE;
11337 break;
11338 case HW_CAP_PMMU:
11339 *mmu_base = mmPMMU_HBW_MMU_BASE;
11340 break;
11341 default:
11342 return -EINVAL;
11343 }
11344
11345 return 0;
11346 }
11347
gaudi2_ack_mmu_error(struct hl_device * hdev,u64 mmu_id)11348 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
11349 {
11350 bool is_pmmu = (mmu_id == HW_CAP_PMMU);
11351 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11352 u32 mmu_base;
11353
11354 if (!(gaudi2->hw_cap_initialized & mmu_id))
11355 return;
11356
11357 if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
11358 return;
11359
11360 gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
11361 gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
11362 }
11363
gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device * hdev,u64 mmu_cap_mask)11364 static int gaudi2_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask)
11365 {
11366 u32 i, mmu_id, num_of_hmmus = NUM_OF_HMMU_PER_DCORE * NUM_OF_DCORES;
11367
11368 /* check all HMMUs */
11369 for (i = 0 ; i < num_of_hmmus ; i++) {
11370 mmu_id = HW_CAP_DCORE0_DMMU0 << i;
11371
11372 if (mmu_cap_mask & mmu_id)
11373 gaudi2_ack_mmu_error(hdev, mmu_id);
11374 }
11375
11376 /* check PMMU */
11377 if (mmu_cap_mask & HW_CAP_PMMU)
11378 gaudi2_ack_mmu_error(hdev, HW_CAP_PMMU);
11379
11380 return 0;
11381 }
11382
gaudi2_get_msi_info(__le32 * table)11383 static void gaudi2_get_msi_info(__le32 *table)
11384 {
11385 table[CPUCP_EVENT_QUEUE_MSI_TYPE] = cpu_to_le32(GAUDI2_EVENT_QUEUE_MSIX_IDX);
11386 table[CPUCP_EVENT_QUEUE_ERR_MSI_TYPE] = cpu_to_le32(GAUDI2_IRQ_NUM_EQ_ERROR);
11387 }
11388
gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)11389 static int gaudi2_map_pll_idx_to_fw_idx(u32 pll_idx)
11390 {
11391 switch (pll_idx) {
11392 case HL_GAUDI2_CPU_PLL: return CPU_PLL;
11393 case HL_GAUDI2_PCI_PLL: return PCI_PLL;
11394 case HL_GAUDI2_NIC_PLL: return NIC_PLL;
11395 case HL_GAUDI2_DMA_PLL: return DMA_PLL;
11396 case HL_GAUDI2_MESH_PLL: return MESH_PLL;
11397 case HL_GAUDI2_MME_PLL: return MME_PLL;
11398 case HL_GAUDI2_TPC_PLL: return TPC_PLL;
11399 case HL_GAUDI2_IF_PLL: return IF_PLL;
11400 case HL_GAUDI2_SRAM_PLL: return SRAM_PLL;
11401 case HL_GAUDI2_HBM_PLL: return HBM_PLL;
11402 case HL_GAUDI2_VID_PLL: return VID_PLL;
11403 case HL_GAUDI2_MSS_PLL: return MSS_PLL;
11404 default: return -EINVAL;
11405 }
11406 }
11407
gaudi2_gen_sync_to_engine_map(struct hl_device * hdev,struct hl_sync_to_engine_map * map)11408 static int gaudi2_gen_sync_to_engine_map(struct hl_device *hdev, struct hl_sync_to_engine_map *map)
11409 {
11410 /* Not implemented */
11411 return 0;
11412 }
11413
gaudi2_monitor_valid(struct hl_mon_state_dump * mon)11414 static int gaudi2_monitor_valid(struct hl_mon_state_dump *mon)
11415 {
11416 /* Not implemented */
11417 return 0;
11418 }
11419
gaudi2_print_single_monitor(char ** buf,size_t * size,size_t * offset,struct hl_device * hdev,struct hl_mon_state_dump * mon)11420 static int gaudi2_print_single_monitor(char **buf, size_t *size, size_t *offset,
11421 struct hl_device *hdev, struct hl_mon_state_dump *mon)
11422 {
11423 /* Not implemented */
11424 return 0;
11425 }
11426
11427
gaudi2_print_fences_single_engine(struct hl_device * hdev,u64 base_offset,u64 status_base_offset,enum hl_sync_engine_type engine_type,u32 engine_id,char ** buf,size_t * size,size_t * offset)11428 static int gaudi2_print_fences_single_engine(struct hl_device *hdev, u64 base_offset,
11429 u64 status_base_offset, enum hl_sync_engine_type engine_type,
11430 u32 engine_id, char **buf, size_t *size, size_t *offset)
11431 {
11432 /* Not implemented */
11433 return 0;
11434 }
11435
11436
11437 static struct hl_state_dump_specs_funcs gaudi2_state_dump_funcs = {
11438 .monitor_valid = gaudi2_monitor_valid,
11439 .print_single_monitor = gaudi2_print_single_monitor,
11440 .gen_sync_to_engine_map = gaudi2_gen_sync_to_engine_map,
11441 .print_fences_single_engine = gaudi2_print_fences_single_engine,
11442 };
11443
gaudi2_state_dump_init(struct hl_device * hdev)11444 static void gaudi2_state_dump_init(struct hl_device *hdev)
11445 {
11446 /* Not implemented */
11447 hdev->state_dump_specs.props = gaudi2_state_dump_specs_props;
11448 hdev->state_dump_specs.funcs = gaudi2_state_dump_funcs;
11449 }
11450
gaudi2_get_sob_addr(struct hl_device * hdev,u32 sob_id)11451 static u32 gaudi2_get_sob_addr(struct hl_device *hdev, u32 sob_id)
11452 {
11453 return 0;
11454 }
11455
gaudi2_get_stream_master_qid_arr(void)11456 static u32 *gaudi2_get_stream_master_qid_arr(void)
11457 {
11458 return NULL;
11459 }
11460
gaudi2_add_device_attr(struct hl_device * hdev,struct attribute_group * dev_clk_attr_grp,struct attribute_group * dev_vrm_attr_grp)11461 static void gaudi2_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp,
11462 struct attribute_group *dev_vrm_attr_grp)
11463 {
11464 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp);
11465 hl_sysfs_add_dev_vrm_attr(hdev, dev_vrm_attr_grp);
11466 }
11467
gaudi2_mmu_get_real_page_size(struct hl_device * hdev,struct hl_mmu_properties * mmu_prop,u32 page_size,u32 * real_page_size,bool is_dram_addr)11468 static int gaudi2_mmu_get_real_page_size(struct hl_device *hdev, struct hl_mmu_properties *mmu_prop,
11469 u32 page_size, u32 *real_page_size, bool is_dram_addr)
11470 {
11471 struct asic_fixed_properties *prop = &hdev->asic_prop;
11472
11473 /* for host pages the page size must be */
11474 if (!is_dram_addr) {
11475 if (page_size % mmu_prop->page_size)
11476 goto page_size_err;
11477
11478 *real_page_size = mmu_prop->page_size;
11479 return 0;
11480 }
11481
11482 if ((page_size % prop->dram_page_size) || (prop->dram_page_size > mmu_prop->page_size))
11483 goto page_size_err;
11484
11485 /*
11486 * MMU page size is different from DRAM page size (more precisely, DMMU page is greater
11487 * than DRAM page size).
11488 * for this reason work with the DRAM page size and let the MMU scrambling routine handle
11489 * this mismatch when calculating the address to place in the MMU page table.
11490 * (in that case also make sure that the dram_page_size is not greater than the
11491 * mmu page size)
11492 */
11493 *real_page_size = prop->dram_page_size;
11494
11495 return 0;
11496
11497 page_size_err:
11498 dev_err(hdev->dev, "page size of 0x%X is not 0x%X aligned, can't map\n",
11499 page_size, mmu_prop->page_size >> 10);
11500 return -EFAULT;
11501 }
11502
gaudi2_get_monitor_dump(struct hl_device * hdev,void * data)11503 static int gaudi2_get_monitor_dump(struct hl_device *hdev, void *data)
11504 {
11505 return -EOPNOTSUPP;
11506 }
11507
gaudi2_send_device_activity(struct hl_device * hdev,bool open)11508 int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
11509 {
11510 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11511
11512 if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
11513 return 0;
11514
11515 return hl_fw_send_device_activity(hdev, open);
11516 }
11517
gaudi2_read_pte(struct hl_device * hdev,u64 addr)11518 static u64 gaudi2_read_pte(struct hl_device *hdev, u64 addr)
11519 {
11520 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11521 u64 val;
11522
11523 if (hdev->reset_info.hard_reset_pending)
11524 return U64_MAX;
11525
11526 val = readq(hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11527
11528 return val;
11529 }
11530
gaudi2_write_pte(struct hl_device * hdev,u64 addr,u64 val)11531 static void gaudi2_write_pte(struct hl_device *hdev, u64 addr, u64 val)
11532 {
11533 struct gaudi2_device *gaudi2 = hdev->asic_specific;
11534
11535 if (hdev->reset_info.hard_reset_pending)
11536 return;
11537
11538 writeq(val, hdev->pcie_bar[DRAM_BAR_ID] + (addr - gaudi2->dram_bar_cur_addr));
11539 }
11540
11541 static const struct hl_asic_funcs gaudi2_funcs = {
11542 .early_init = gaudi2_early_init,
11543 .early_fini = gaudi2_early_fini,
11544 .late_init = gaudi2_late_init,
11545 .late_fini = gaudi2_late_fini,
11546 .sw_init = gaudi2_sw_init,
11547 .sw_fini = gaudi2_sw_fini,
11548 .hw_init = gaudi2_hw_init,
11549 .hw_fini = gaudi2_hw_fini,
11550 .halt_engines = gaudi2_halt_engines,
11551 .suspend = gaudi2_suspend,
11552 .resume = gaudi2_resume,
11553 .mmap = gaudi2_mmap,
11554 .ring_doorbell = gaudi2_ring_doorbell,
11555 .pqe_write = gaudi2_pqe_write,
11556 .asic_dma_alloc_coherent = gaudi2_dma_alloc_coherent,
11557 .asic_dma_free_coherent = gaudi2_dma_free_coherent,
11558 .scrub_device_mem = gaudi2_scrub_device_mem,
11559 .scrub_device_dram = gaudi2_scrub_device_dram,
11560 .get_int_queue_base = NULL,
11561 .test_queues = gaudi2_test_queues,
11562 .asic_dma_pool_zalloc = gaudi2_dma_pool_zalloc,
11563 .asic_dma_pool_free = gaudi2_dma_pool_free,
11564 .cpu_accessible_dma_pool_alloc = gaudi2_cpu_accessible_dma_pool_alloc,
11565 .cpu_accessible_dma_pool_free = gaudi2_cpu_accessible_dma_pool_free,
11566 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
11567 .cs_parser = gaudi2_cs_parser,
11568 .dma_map_sgtable = hl_asic_dma_map_sgtable,
11569 .add_end_of_cb_packets = NULL,
11570 .update_eq_ci = gaudi2_update_eq_ci,
11571 .context_switch = gaudi2_context_switch,
11572 .restore_phase_topology = gaudi2_restore_phase_topology,
11573 .debugfs_read_dma = gaudi2_debugfs_read_dma,
11574 .add_device_attr = gaudi2_add_device_attr,
11575 .handle_eqe = gaudi2_handle_eqe,
11576 .get_events_stat = gaudi2_get_events_stat,
11577 .read_pte = gaudi2_read_pte,
11578 .write_pte = gaudi2_write_pte,
11579 .mmu_invalidate_cache = gaudi2_mmu_invalidate_cache,
11580 .mmu_invalidate_cache_range = gaudi2_mmu_invalidate_cache_range,
11581 .mmu_prefetch_cache_range = NULL,
11582 .send_heartbeat = gaudi2_send_heartbeat,
11583 .debug_coresight = gaudi2_debug_coresight,
11584 .is_device_idle = gaudi2_is_device_idle,
11585 .compute_reset_late_init = gaudi2_compute_reset_late_init,
11586 .hw_queues_lock = gaudi2_hw_queues_lock,
11587 .hw_queues_unlock = gaudi2_hw_queues_unlock,
11588 .get_pci_id = gaudi2_get_pci_id,
11589 .get_eeprom_data = gaudi2_get_eeprom_data,
11590 .get_monitor_dump = gaudi2_get_monitor_dump,
11591 .send_cpu_message = gaudi2_send_cpu_message,
11592 .pci_bars_map = gaudi2_pci_bars_map,
11593 .init_iatu = gaudi2_init_iatu,
11594 .rreg = hl_rreg,
11595 .wreg = hl_wreg,
11596 .halt_coresight = gaudi2_halt_coresight,
11597 .ctx_init = gaudi2_ctx_init,
11598 .ctx_fini = gaudi2_ctx_fini,
11599 .pre_schedule_cs = gaudi2_pre_schedule_cs,
11600 .get_queue_id_for_cq = gaudi2_get_queue_id_for_cq,
11601 .load_firmware_to_device = NULL,
11602 .load_boot_fit_to_device = NULL,
11603 .get_signal_cb_size = gaudi2_get_signal_cb_size,
11604 .get_wait_cb_size = gaudi2_get_wait_cb_size,
11605 .gen_signal_cb = gaudi2_gen_signal_cb,
11606 .gen_wait_cb = gaudi2_gen_wait_cb,
11607 .reset_sob = gaudi2_reset_sob,
11608 .reset_sob_group = gaudi2_reset_sob_group,
11609 .get_device_time = gaudi2_get_device_time,
11610 .pb_print_security_errors = gaudi2_pb_print_security_errors,
11611 .collective_wait_init_cs = gaudi2_collective_wait_init_cs,
11612 .collective_wait_create_jobs = gaudi2_collective_wait_create_jobs,
11613 .get_dec_base_addr = gaudi2_get_dec_base_addr,
11614 .scramble_addr = gaudi2_mmu_scramble_addr,
11615 .descramble_addr = gaudi2_mmu_descramble_addr,
11616 .ack_protection_bits_errors = gaudi2_ack_protection_bits_errors,
11617 .get_hw_block_id = gaudi2_get_hw_block_id,
11618 .hw_block_mmap = gaudi2_block_mmap,
11619 .enable_events_from_fw = gaudi2_enable_events_from_fw,
11620 .ack_mmu_errors = gaudi2_ack_mmu_page_fault_or_access_error,
11621 .get_msi_info = gaudi2_get_msi_info,
11622 .map_pll_idx_to_fw_idx = gaudi2_map_pll_idx_to_fw_idx,
11623 .init_firmware_preload_params = gaudi2_init_firmware_preload_params,
11624 .init_firmware_loader = gaudi2_init_firmware_loader,
11625 .init_cpu_scrambler_dram = gaudi2_init_scrambler_hbm,
11626 .state_dump_init = gaudi2_state_dump_init,
11627 .get_sob_addr = &gaudi2_get_sob_addr,
11628 .set_pci_memory_regions = gaudi2_set_pci_memory_regions,
11629 .get_stream_master_qid_arr = gaudi2_get_stream_master_qid_arr,
11630 .check_if_razwi_happened = gaudi2_check_if_razwi_happened,
11631 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size,
11632 .access_dev_mem = hl_access_dev_mem,
11633 .set_dram_bar_base = gaudi2_set_hbm_bar_base,
11634 .set_engine_cores = gaudi2_set_engine_cores,
11635 .set_engines = gaudi2_set_engines,
11636 .send_device_activity = gaudi2_send_device_activity,
11637 .set_dram_properties = gaudi2_set_dram_properties,
11638 .set_binning_masks = gaudi2_set_binning_masks,
11639 };
11640
gaudi2_set_asic_funcs(struct hl_device * hdev)11641 void gaudi2_set_asic_funcs(struct hl_device *hdev)
11642 {
11643 hdev->asic_funcs = &gaudi2_funcs;
11644 }
11645