1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE); 67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE); 68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE); 69 70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 71 72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 76 77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 86 87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 88 89 #define GAUDI_MAX_STRING_LEN 20 90 91 #define GAUDI_CB_POOL_CB_CNT 512 92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 93 94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 95 96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 97 98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 99 100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 101 102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 103 104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 105 106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 107 108 #define MONITOR_SOB_STRING_SIZE 256 109 110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 111 GAUDI_QUEUE_ID_DMA_0_0, 112 GAUDI_QUEUE_ID_DMA_0_1, 113 GAUDI_QUEUE_ID_DMA_0_2, 114 GAUDI_QUEUE_ID_DMA_0_3, 115 GAUDI_QUEUE_ID_DMA_1_0, 116 GAUDI_QUEUE_ID_DMA_1_1, 117 GAUDI_QUEUE_ID_DMA_1_2, 118 GAUDI_QUEUE_ID_DMA_1_3 119 }; 120 121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 130 }; 131 132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 133 [0] = GAUDI_QUEUE_ID_DMA_0_0, 134 [1] = GAUDI_QUEUE_ID_DMA_0_1, 135 [2] = GAUDI_QUEUE_ID_DMA_0_2, 136 [3] = GAUDI_QUEUE_ID_DMA_0_3, 137 [4] = GAUDI_QUEUE_ID_DMA_1_0, 138 [5] = GAUDI_QUEUE_ID_DMA_1_1, 139 [6] = GAUDI_QUEUE_ID_DMA_1_2, 140 [7] = GAUDI_QUEUE_ID_DMA_1_3, 141 }; 142 143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 144 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 149 [PACKET_REPEAT] = sizeof(struct packet_repeat), 150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 151 [PACKET_FENCE] = sizeof(struct packet_fence), 152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 153 [PACKET_NOP] = sizeof(struct packet_nop), 154 [PACKET_STOP] = sizeof(struct packet_stop), 155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 156 [PACKET_WAIT] = sizeof(struct packet_wait), 157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 158 }; 159 160 static inline bool validate_packet_id(enum packet_id id) 161 { 162 switch (id) { 163 case PACKET_WREG_32: 164 case PACKET_WREG_BULK: 165 case PACKET_MSG_LONG: 166 case PACKET_MSG_SHORT: 167 case PACKET_CP_DMA: 168 case PACKET_REPEAT: 169 case PACKET_MSG_PROT: 170 case PACKET_FENCE: 171 case PACKET_LIN_DMA: 172 case PACKET_NOP: 173 case PACKET_STOP: 174 case PACKET_ARB_POINT: 175 case PACKET_WAIT: 176 case PACKET_LOAD_AND_EXE: 177 return true; 178 default: 179 return false; 180 } 181 } 182 183 static const char * const 184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 185 "tpc_address_exceed_slm", 186 "tpc_div_by_0", 187 "tpc_spu_mac_overflow", 188 "tpc_spu_addsub_overflow", 189 "tpc_spu_abs_overflow", 190 "tpc_spu_fp_dst_nan_inf", 191 "tpc_spu_fp_dst_denorm", 192 "tpc_vpu_mac_overflow", 193 "tpc_vpu_addsub_overflow", 194 "tpc_vpu_abs_overflow", 195 "tpc_vpu_fp_dst_nan_inf", 196 "tpc_vpu_fp_dst_denorm", 197 "tpc_assertions", 198 "tpc_illegal_instruction", 199 "tpc_pc_wrap_around", 200 "tpc_qm_sw_err", 201 "tpc_hbw_rresp_err", 202 "tpc_hbw_bresp_err", 203 "tpc_lbw_rresp_err", 204 "tpc_lbw_bresp_err" 205 }; 206 207 static const char * const 208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 209 "PQ AXI HBW error", 210 "CQ AXI HBW error", 211 "CP AXI HBW error", 212 "CP error due to undefined OPCODE", 213 "CP encountered STOP OPCODE", 214 "CP AXI LBW error", 215 "CP WRREG32 or WRBULK returned error", 216 "N/A", 217 "FENCE 0 inc over max value and clipped", 218 "FENCE 1 inc over max value and clipped", 219 "FENCE 2 inc over max value and clipped", 220 "FENCE 3 inc over max value and clipped", 221 "FENCE 0 dec under min value and clipped", 222 "FENCE 1 dec under min value and clipped", 223 "FENCE 2 dec under min value and clipped", 224 "FENCE 3 dec under min value and clipped" 225 }; 226 227 static const char * const 228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 229 "Choice push while full error", 230 "Choice Q watchdog error", 231 "MSG AXI LBW returned with error" 232 }; 233 234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 348 }; 349 350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 378 }; 379 380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 392 }; 393 394 static s64 gaudi_state_dump_specs_props[] = { 395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 398 [SP_MON_OBJ_WR_ADDR_LOW] = 399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 400 [SP_MON_OBJ_WR_ADDR_HIGH] = 401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 422 [SP_FENCE0_CNT_OFFSET] = 423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 424 [SP_FENCE0_RDATA_OFFSET] = 425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_NUM_CORES] = 1, 428 }; 429 430 static const int gaudi_queue_id_to_engine_id[] = { 431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 460 }; 461 462 /* The order here is opposite to the order of the indexing in the h/w. 463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 464 */ 465 static const char * const gaudi_sync_manager_names[] = { 466 "SYNC_MGR_E_N", 467 "SYNC_MGR_W_N", 468 "SYNC_MGR_E_S", 469 "SYNC_MGR_W_S", 470 NULL 471 }; 472 473 struct ecc_info_extract_params { 474 u64 block_address; 475 u32 num_memories; 476 bool derr; 477 }; 478 479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 480 u64 phys_addr); 481 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 482 struct hl_cs_job *job); 483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 484 u32 size, u64 val); 485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 486 u32 num_regs, u32 val); 487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 488 u32 tpc_id); 489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 490 static int gaudi_cpucp_info_get(struct hl_device *hdev); 491 static void gaudi_disable_clock_gating(struct hl_device *hdev); 492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 494 u32 size, bool eb); 495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 496 struct hl_gen_wait_properties *prop); 497 static inline enum hl_collective_mode 498 get_collective_mode(struct hl_device *hdev, u32 queue_id) 499 { 500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 501 return HL_COLLECTIVE_MASTER; 502 503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 505 return HL_COLLECTIVE_SLAVE; 506 507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 509 return HL_COLLECTIVE_SLAVE; 510 511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 513 return HL_COLLECTIVE_SLAVE; 514 515 return HL_COLLECTIVE_NOT_SUPPORTED; 516 } 517 518 static inline void set_default_power_values(struct hl_device *hdev) 519 { 520 struct asic_fixed_properties *prop = &hdev->asic_prop; 521 522 if (hdev->card_type == cpucp_card_type_pmc) { 523 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 524 525 if (prop->fw_security_enabled) 526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 527 else 528 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 529 } else { 530 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 531 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 532 } 533 } 534 535 static int gaudi_set_fixed_properties(struct hl_device *hdev) 536 { 537 struct asic_fixed_properties *prop = &hdev->asic_prop; 538 u32 num_sync_stream_queues = 0; 539 int i; 540 541 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 542 prop->hw_queues_props = kcalloc(prop->max_queues, 543 sizeof(struct hw_queue_properties), 544 GFP_KERNEL); 545 546 if (!prop->hw_queues_props) 547 return -ENOMEM; 548 549 for (i = 0 ; i < prop->max_queues ; i++) { 550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 552 prop->hw_queues_props[i].driver_only = 0; 553 prop->hw_queues_props[i].supports_sync_stream = 1; 554 prop->hw_queues_props[i].cb_alloc_flags = 555 CB_ALLOC_KERNEL; 556 num_sync_stream_queues++; 557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 559 prop->hw_queues_props[i].driver_only = 1; 560 prop->hw_queues_props[i].supports_sync_stream = 0; 561 prop->hw_queues_props[i].cb_alloc_flags = 562 CB_ALLOC_KERNEL; 563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 565 prop->hw_queues_props[i].driver_only = 0; 566 prop->hw_queues_props[i].supports_sync_stream = 0; 567 prop->hw_queues_props[i].cb_alloc_flags = 568 CB_ALLOC_USER; 569 570 } 571 prop->hw_queues_props[i].collective_mode = 572 get_collective_mode(hdev, i); 573 } 574 575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 576 prop->cfg_base_address = CFG_BASE; 577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 578 prop->host_base_address = HOST_PHYS_BASE; 579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 581 prop->completion_mode = HL_COMPLETION_MODE_JOB; 582 prop->collective_first_sob = 0; 583 prop->collective_first_mon = 0; 584 585 /* 2 SOBs per internal queue stream are reserved for collective */ 586 prop->sync_stream_first_sob = 587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 588 * QMAN_STREAMS * HL_RSVD_SOBS; 589 590 /* 1 monitor per internal queue stream are reserved for collective 591 * 2 monitors per external queue stream are reserved for collective 592 */ 593 prop->sync_stream_first_mon = 594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 595 (NUMBER_OF_EXT_HW_QUEUES * 2); 596 597 prop->dram_base_address = DRAM_PHYS_BASE; 598 prop->dram_size = GAUDI_HBM_SIZE_32GB; 599 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 601 602 prop->sram_base_address = SRAM_BASE_ADDR; 603 prop->sram_size = SRAM_SIZE; 604 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 605 prop->sram_user_base_address = 606 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 607 608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 610 611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 612 if (hdev->pldm) 613 prop->mmu_pgt_size = 0x800000; /* 8MB */ 614 else 615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 616 prop->mmu_pte_size = HL_PTE_SIZE; 617 prop->dram_page_size = PAGE_SIZE_2MB; 618 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 619 prop->dram_supports_virtual_memory = false; 620 621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 631 prop->pmmu.start_addr = VA_HOST_SPACE_START; 632 prop->pmmu.end_addr = 633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 634 prop->pmmu.page_size = PAGE_SIZE_4KB; 635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 636 prop->pmmu.last_mask = LAST_MASK; 637 /* TODO: will be duplicated until implementing per-MMU props */ 638 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; 639 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 640 641 /* PMMU and HPMMU are the same except of page size */ 642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 644 645 /* shifts and masks are the same in PMMU and DMMU */ 646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 648 prop->dmmu.end_addr = VA_HOST_SPACE_END; 649 prop->dmmu.page_size = PAGE_SIZE_2MB; 650 prop->dmmu.pgt_size = prop->mmu_pgt_size; 651 652 prop->cfg_size = CFG_SIZE; 653 prop->max_asid = MAX_ASID; 654 prop->num_of_events = GAUDI_EVENT_SIZE; 655 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 656 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 657 658 set_default_power_values(hdev); 659 660 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 661 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 662 663 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 664 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 665 666 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 667 CARD_NAME_MAX_LEN); 668 669 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 670 671 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 672 prop->sync_stream_first_sob + 673 (num_sync_stream_queues * HL_RSVD_SOBS); 674 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 675 prop->sync_stream_first_mon + 676 (num_sync_stream_queues * HL_RSVD_MONS); 677 678 prop->first_available_user_interrupt = USHRT_MAX; 679 prop->tpc_interrupt_id = USHRT_MAX; 680 681 /* single msi */ 682 prop->eq_interrupt_id = 0; 683 684 for (i = 0 ; i < HL_MAX_DCORES ; i++) 685 prop->first_available_cq[i] = USHRT_MAX; 686 687 prop->fw_cpu_boot_dev_sts0_valid = false; 688 prop->fw_cpu_boot_dev_sts1_valid = false; 689 prop->hard_reset_done_by_fw = false; 690 prop->gic_interrupts_enable = true; 691 692 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 693 694 prop->clk_pll_index = HL_GAUDI_MME_PLL; 695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 696 697 prop->use_get_power_for_reset_history = true; 698 699 prop->configurable_stop_on_err = true; 700 701 prop->set_max_power_on_device_init = true; 702 703 prop->dma_mask = 48; 704 705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 706 707 return 0; 708 } 709 710 static int gaudi_pci_bars_map(struct hl_device *hdev) 711 { 712 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 713 bool is_wc[3] = {false, false, true}; 714 int rc; 715 716 rc = hl_pci_bars_map(hdev, name, is_wc); 717 if (rc) 718 return rc; 719 720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 721 (CFG_BASE - SPI_FLASH_BASE_ADDR); 722 723 return 0; 724 } 725 726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 727 { 728 struct gaudi_device *gaudi = hdev->asic_specific; 729 struct hl_inbound_pci_region pci_region; 730 u64 old_addr = addr; 731 int rc; 732 733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 734 return old_addr; 735 736 if (hdev->asic_prop.iatu_done_by_fw) 737 return U64_MAX; 738 739 /* Inbound Region 2 - Bar 4 - Point to HBM */ 740 pci_region.mode = PCI_BAR_MATCH_MODE; 741 pci_region.bar = HBM_BAR_ID; 742 pci_region.addr = addr; 743 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 744 if (rc) 745 return U64_MAX; 746 747 if (gaudi) { 748 old_addr = gaudi->hbm_bar_cur_addr; 749 gaudi->hbm_bar_cur_addr = addr; 750 } 751 752 return old_addr; 753 } 754 755 static int gaudi_init_iatu(struct hl_device *hdev) 756 { 757 struct hl_inbound_pci_region inbound_region; 758 struct hl_outbound_pci_region outbound_region; 759 int rc; 760 761 if (hdev->asic_prop.iatu_done_by_fw) 762 return 0; 763 764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 765 inbound_region.mode = PCI_BAR_MATCH_MODE; 766 inbound_region.bar = SRAM_BAR_ID; 767 inbound_region.addr = SRAM_BASE_ADDR; 768 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 769 if (rc) 770 goto done; 771 772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 773 inbound_region.mode = PCI_BAR_MATCH_MODE; 774 inbound_region.bar = CFG_BAR_ID; 775 inbound_region.addr = SPI_FLASH_BASE_ADDR; 776 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 777 if (rc) 778 goto done; 779 780 /* Inbound Region 2 - Bar 4 - Point to HBM */ 781 inbound_region.mode = PCI_BAR_MATCH_MODE; 782 inbound_region.bar = HBM_BAR_ID; 783 inbound_region.addr = DRAM_PHYS_BASE; 784 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 785 if (rc) 786 goto done; 787 788 /* Outbound Region 0 - Point to Host */ 789 outbound_region.addr = HOST_PHYS_BASE; 790 outbound_region.size = HOST_PHYS_SIZE; 791 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 792 793 done: 794 return rc; 795 } 796 797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 798 { 799 return RREG32(mmHW_STATE); 800 } 801 802 static int gaudi_early_init(struct hl_device *hdev) 803 { 804 struct asic_fixed_properties *prop = &hdev->asic_prop; 805 struct pci_dev *pdev = hdev->pdev; 806 resource_size_t pci_bar_size; 807 u32 fw_boot_status; 808 int rc; 809 810 rc = gaudi_set_fixed_properties(hdev); 811 if (rc) { 812 dev_err(hdev->dev, "Failed setting fixed properties\n"); 813 return rc; 814 } 815 816 /* Check BAR sizes */ 817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 818 819 if (pci_bar_size != SRAM_BAR_SIZE) { 820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 822 rc = -ENODEV; 823 goto free_queue_props; 824 } 825 826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 827 828 if (pci_bar_size != CFG_BAR_SIZE) { 829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 831 rc = -ENODEV; 832 goto free_queue_props; 833 } 834 835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 837 838 /* If FW security is enabled at this point it means no access to ELBI */ 839 if (hdev->asic_prop.fw_security_enabled) { 840 hdev->asic_prop.iatu_done_by_fw = true; 841 842 /* 843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 844 * decision can only be taken based on PCI ID security. 845 */ 846 hdev->asic_prop.gic_interrupts_enable = false; 847 goto pci_init; 848 } 849 850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 851 &fw_boot_status); 852 if (rc) 853 goto free_queue_props; 854 855 /* Check whether FW is configuring iATU */ 856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 858 hdev->asic_prop.iatu_done_by_fw = true; 859 860 pci_init: 861 rc = hl_pci_init(hdev); 862 if (rc) 863 goto free_queue_props; 864 865 /* Before continuing in the initialization, we need to read the preboot 866 * version to determine whether we run with a security-enabled firmware 867 */ 868 rc = hl_fw_read_preboot_status(hdev); 869 if (rc) { 870 if (hdev->reset_on_preboot_fail) 871 /* we are already on failure flow, so don't check if hw_fini fails. */ 872 hdev->asic_funcs->hw_fini(hdev, true, false); 873 goto pci_fini; 874 } 875 876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 878 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 879 if (rc) { 880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 881 goto pci_fini; 882 } 883 } 884 885 return 0; 886 887 pci_fini: 888 hl_pci_fini(hdev); 889 free_queue_props: 890 kfree(hdev->asic_prop.hw_queues_props); 891 return rc; 892 } 893 894 static int gaudi_early_fini(struct hl_device *hdev) 895 { 896 kfree(hdev->asic_prop.hw_queues_props); 897 hl_pci_fini(hdev); 898 899 return 0; 900 } 901 902 /** 903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 904 * 905 * @hdev: pointer to hl_device structure 906 * 907 */ 908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 909 { 910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 911 struct asic_fixed_properties *prop = &hdev->asic_prop; 912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 913 int rc; 914 915 if ((hdev->fw_components & FW_TYPE_LINUX) && 916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 917 struct gaudi_device *gaudi = hdev->asic_specific; 918 919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 920 return 0; 921 922 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 923 924 if (rc) 925 return rc; 926 927 freq = pll_freq_arr[2]; 928 } else { 929 /* Backward compatibility */ 930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 932 nr = RREG32(mmPSOC_CPU_PLL_NR); 933 nf = RREG32(mmPSOC_CPU_PLL_NF); 934 od = RREG32(mmPSOC_CPU_PLL_OD); 935 936 if (div_sel == DIV_SEL_REF_CLK || 937 div_sel == DIV_SEL_DIVIDED_REF) { 938 if (div_sel == DIV_SEL_REF_CLK) 939 freq = PLL_REF_CLK; 940 else 941 freq = PLL_REF_CLK / (div_fctr + 1); 942 } else if (div_sel == DIV_SEL_PLL_CLK || 943 div_sel == DIV_SEL_DIVIDED_PLL) { 944 pll_clk = PLL_REF_CLK * (nf + 1) / 945 ((nr + 1) * (od + 1)); 946 if (div_sel == DIV_SEL_PLL_CLK) 947 freq = pll_clk; 948 else 949 freq = pll_clk / (div_fctr + 1); 950 } else { 951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 952 freq = 0; 953 } 954 } 955 956 prop->psoc_timestamp_frequency = freq; 957 prop->psoc_pci_pll_nr = nr; 958 prop->psoc_pci_pll_nf = nf; 959 prop->psoc_pci_pll_od = od; 960 prop->psoc_pci_pll_div_factor = div_fctr; 961 962 return 0; 963 } 964 965 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 967 { 968 struct asic_fixed_properties *prop = &hdev->asic_prop; 969 struct packet_lin_dma *init_tpc_mem_pkt; 970 struct hl_cs_job *job; 971 struct hl_cb *cb; 972 u64 dst_addr; 973 u32 cb_size, ctl; 974 u8 tpc_id; 975 int rc; 976 977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 978 if (!cb) 979 return -EFAULT; 980 981 init_tpc_mem_pkt = cb->kernel_address; 982 cb_size = sizeof(*init_tpc_mem_pkt); 983 memset(init_tpc_mem_pkt, 0, cb_size); 984 985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 986 987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 991 992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 993 994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 995 996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 998 round_up(prop->sram_user_base_address, SZ_8K)); 999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1000 1001 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1002 if (!job) { 1003 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1004 rc = -ENOMEM; 1005 goto release_cb; 1006 } 1007 1008 job->id = 0; 1009 job->user_cb = cb; 1010 atomic_inc(&job->user_cb->cs_cnt); 1011 job->user_cb_size = cb_size; 1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1013 job->patched_cb = job->user_cb; 1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1015 1016 hl_debugfs_add_job(hdev, job); 1017 1018 rc = gaudi_send_job_on_qman0(hdev, job); 1019 1020 if (rc) 1021 goto free_job; 1022 1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1024 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1025 if (rc) 1026 break; 1027 } 1028 1029 free_job: 1030 hl_userptr_delete_list(hdev, &job->userptr_list); 1031 hl_debugfs_remove_job(hdev, job); 1032 kfree(job); 1033 atomic_dec(&cb->cs_cnt); 1034 1035 release_cb: 1036 hl_cb_put(cb); 1037 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1038 1039 return rc; 1040 } 1041 1042 /* 1043 * gaudi_init_tpc_mem() - Initialize TPC memories. 1044 * @hdev: Pointer to hl_device structure. 1045 * 1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1047 * 1048 * Return: 0 for success, negative value for error. 1049 */ 1050 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1051 { 1052 const struct firmware *fw; 1053 size_t fw_size; 1054 void *cpu_addr; 1055 dma_addr_t dma_handle; 1056 int rc, count = 5; 1057 1058 again: 1059 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1060 if (rc == -EINTR && count-- > 0) { 1061 msleep(50); 1062 goto again; 1063 } 1064 1065 if (rc) { 1066 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1067 GAUDI_TPC_FW_FILE); 1068 goto out; 1069 } 1070 1071 fw_size = fw->size; 1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1073 if (!cpu_addr) { 1074 dev_err(hdev->dev, 1075 "Failed to allocate %zu of dma memory for TPC kernel\n", 1076 fw_size); 1077 rc = -ENOMEM; 1078 goto out; 1079 } 1080 1081 memcpy(cpu_addr, fw->data, fw_size); 1082 1083 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1084 1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1086 1087 out: 1088 release_firmware(fw); 1089 return rc; 1090 } 1091 1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1093 { 1094 struct gaudi_device *gaudi = hdev->asic_specific; 1095 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1096 struct hl_hw_queue *q; 1097 u32 i, sob_id, sob_group_id, queue_id; 1098 1099 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1100 sob_group_id = 1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1103 1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1106 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1107 q->sync_stream_prop.collective_sob_id = sob_id + i; 1108 } 1109 1110 /* Both DMA5 and TPC7 use the same resources since only a single 1111 * engine need to participate in the reduction process 1112 */ 1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1114 q = &hdev->kernel_queues[queue_id]; 1115 q->sync_stream_prop.collective_sob_id = 1116 sob_id + NIC_NUMBER_OF_ENGINES; 1117 1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1119 q = &hdev->kernel_queues[queue_id]; 1120 q->sync_stream_prop.collective_sob_id = 1121 sob_id + NIC_NUMBER_OF_ENGINES; 1122 } 1123 1124 static void gaudi_sob_group_hw_reset(struct kref *ref) 1125 { 1126 struct gaudi_hw_sob_group *hw_sob_group = 1127 container_of(ref, struct gaudi_hw_sob_group, kref); 1128 struct hl_device *hdev = hw_sob_group->hdev; 1129 int i; 1130 1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1134 1135 kref_init(&hw_sob_group->kref); 1136 } 1137 1138 static void gaudi_sob_group_reset_error(struct kref *ref) 1139 { 1140 struct gaudi_hw_sob_group *hw_sob_group = 1141 container_of(ref, struct gaudi_hw_sob_group, kref); 1142 struct hl_device *hdev = hw_sob_group->hdev; 1143 1144 dev_crit(hdev->dev, 1145 "SOB release shouldn't be called here, base_sob_id: %d\n", 1146 hw_sob_group->base_sob_id); 1147 } 1148 1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1150 { 1151 struct gaudi_collective_properties *prop; 1152 int i; 1153 1154 prop = &gaudi->collective_props; 1155 1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1157 1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1161 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1162 /* Set collective engine bit */ 1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1164 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1165 } 1166 1167 static int gaudi_collective_init(struct hl_device *hdev) 1168 { 1169 u32 i, sob_id, reserved_sobs_per_group; 1170 struct gaudi_collective_properties *prop; 1171 struct gaudi_device *gaudi; 1172 1173 gaudi = hdev->asic_specific; 1174 prop = &gaudi->collective_props; 1175 sob_id = hdev->asic_prop.collective_first_sob; 1176 1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1178 reserved_sobs_per_group = 1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1180 1181 /* Init SOB groups */ 1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1183 prop->hw_sob_group[i].hdev = hdev; 1184 prop->hw_sob_group[i].base_sob_id = sob_id; 1185 sob_id += reserved_sobs_per_group; 1186 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1187 } 1188 1189 for (i = 0 ; i < QMAN_STREAMS; i++) { 1190 prop->next_sob_group_val[i] = 1; 1191 prop->curr_sob_group_idx[i] = 0; 1192 gaudi_collective_map_sobs(hdev, i); 1193 } 1194 1195 gaudi_collective_mstr_sob_mask_set(gaudi); 1196 1197 return 0; 1198 } 1199 1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1201 { 1202 struct gaudi_device *gaudi = hdev->asic_specific; 1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1204 1205 kref_put(&cprop->hw_sob_group[sob_group].kref, 1206 gaudi_sob_group_hw_reset); 1207 } 1208 1209 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1211 { 1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1213 struct gaudi_collective_properties *cprop; 1214 struct hl_gen_wait_properties wait_prop; 1215 struct hl_sync_stream_properties *prop; 1216 struct gaudi_device *gaudi; 1217 1218 gaudi = hdev->asic_specific; 1219 cprop = &gaudi->collective_props; 1220 queue_id = job->hw_queue_id; 1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1222 1223 master_sob_base = 1224 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1225 master_monitor = prop->collective_mstr_mon_id[0]; 1226 1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1228 1229 dev_dbg(hdev->dev, 1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1231 master_sob_base, cprop->mstr_sob_mask[0], 1232 cprop->next_sob_group_val[stream], 1233 master_monitor, queue_id); 1234 1235 wait_prop.data = (void *) job->patched_cb; 1236 wait_prop.sob_base = master_sob_base; 1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1238 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1239 wait_prop.mon_id = master_monitor; 1240 wait_prop.q_idx = queue_id; 1241 wait_prop.size = cb_size; 1242 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1243 1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1245 master_monitor = prop->collective_mstr_mon_id[1]; 1246 1247 dev_dbg(hdev->dev, 1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1249 master_sob_base, cprop->mstr_sob_mask[1], 1250 cprop->next_sob_group_val[stream], 1251 master_monitor, queue_id); 1252 1253 wait_prop.sob_base = master_sob_base; 1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1255 wait_prop.mon_id = master_monitor; 1256 wait_prop.size = cb_size; 1257 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1258 } 1259 1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1262 { 1263 struct hl_gen_wait_properties wait_prop; 1264 struct hl_sync_stream_properties *prop; 1265 u32 queue_id, cb_size = 0; 1266 1267 queue_id = job->hw_queue_id; 1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1269 1270 if (job->cs->encaps_signals) { 1271 /* use the encaps signal handle store earlier in the flow 1272 * and set the SOB information from the encaps 1273 * signals handle 1274 */ 1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1276 cs_cmpl); 1277 1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1279 job->cs->sequence, 1280 cs_cmpl->hw_sob->sob_id, 1281 cs_cmpl->sob_val); 1282 } 1283 1284 /* Add to wait CBs using slave monitor */ 1285 wait_prop.data = (void *) job->user_cb; 1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1287 wait_prop.sob_mask = 0x1; 1288 wait_prop.sob_val = cs_cmpl->sob_val; 1289 wait_prop.mon_id = prop->collective_slave_mon_id; 1290 wait_prop.q_idx = queue_id; 1291 wait_prop.size = cb_size; 1292 1293 dev_dbg(hdev->dev, 1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1296 prop->collective_slave_mon_id, queue_id); 1297 1298 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1299 1300 dev_dbg(hdev->dev, 1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1302 prop->collective_sob_id, queue_id); 1303 1304 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1305 prop->collective_sob_id, cb_size, false); 1306 } 1307 1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1309 { 1310 struct hl_cs_compl *signal_cs_cmpl = 1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1312 struct hl_cs_compl *cs_cmpl = 1313 container_of(cs->fence, struct hl_cs_compl, base_fence); 1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1315 struct gaudi_collective_properties *cprop; 1316 u32 stream, queue_id, sob_group_offset; 1317 struct gaudi_device *gaudi; 1318 struct hl_device *hdev; 1319 struct hl_cs_job *job; 1320 struct hl_ctx *ctx; 1321 1322 ctx = cs->ctx; 1323 hdev = ctx->hdev; 1324 gaudi = hdev->asic_specific; 1325 cprop = &gaudi->collective_props; 1326 1327 if (cs->encaps_signals) { 1328 cs_cmpl->hw_sob = handle->hw_sob; 1329 /* at this checkpoint we only need the hw_sob pointer 1330 * for the completion check before start going over the jobs 1331 * of the master/slaves, the sob_value will be taken later on 1332 * in gaudi_collective_slave_init_job depends on each 1333 * job wait offset value. 1334 */ 1335 cs_cmpl->sob_val = 0; 1336 } else { 1337 /* copy the SOB id and value of the signal CS */ 1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1340 } 1341 1342 /* check again if the signal cs already completed. 1343 * if yes then don't send any wait cs since the hw_sob 1344 * could be in reset already. if signal is not completed 1345 * then get refcount to hw_sob to prevent resetting the sob 1346 * while wait cs is not submitted. 1347 * note that this check is protected by two locks, 1348 * hw queue lock and completion object lock, 1349 * and the same completion object lock also protects 1350 * the hw_sob reset handler function. 1351 * The hw_queue lock prevent out of sync of hw_sob 1352 * refcount value, changed by signal/wait flows. 1353 */ 1354 spin_lock(&signal_cs_cmpl->lock); 1355 1356 if (completion_done(&cs->signal_fence->completion)) { 1357 spin_unlock(&signal_cs_cmpl->lock); 1358 return -EINVAL; 1359 } 1360 /* Increment kref since all slave queues are now waiting on it */ 1361 kref_get(&cs_cmpl->hw_sob->kref); 1362 1363 spin_unlock(&signal_cs_cmpl->lock); 1364 1365 /* Calculate the stream from collective master queue (1st job) */ 1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1367 stream = job->hw_queue_id % 4; 1368 sob_group_offset = 1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1370 1371 list_for_each_entry(job, &cs->job_list, cs_node) { 1372 queue_id = job->hw_queue_id; 1373 1374 if (hdev->kernel_queues[queue_id].collective_mode == 1375 HL_COLLECTIVE_MASTER) 1376 gaudi_collective_master_init_job(hdev, job, stream, 1377 sob_group_offset); 1378 else 1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1380 } 1381 1382 cs_cmpl->sob_group = sob_group_offset; 1383 1384 /* Handle sob group kref and wraparound */ 1385 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1386 cprop->next_sob_group_val[stream]++; 1387 1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1389 /* 1390 * Decrement as we reached the max value. 1391 * The release function won't be called here as we've 1392 * just incremented the refcount. 1393 */ 1394 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1395 gaudi_sob_group_reset_error); 1396 cprop->next_sob_group_val[stream] = 1; 1397 /* only two SOBs are currently in use */ 1398 cprop->curr_sob_group_idx[stream] = 1399 (cprop->curr_sob_group_idx[stream] + 1) & 1400 (HL_RSVD_SOBS - 1); 1401 1402 gaudi_collective_map_sobs(hdev, stream); 1403 1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1405 cprop->curr_sob_group_idx[stream], stream); 1406 } 1407 1408 mb(); 1409 hl_fence_put(cs->signal_fence); 1410 cs->signal_fence = NULL; 1411 1412 return 0; 1413 } 1414 1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1416 { 1417 u32 cacheline_end, additional_commands; 1418 1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1420 additional_commands = sizeof(struct packet_msg_prot) * 2; 1421 1422 if (user_cb_size + additional_commands > cacheline_end) 1423 return cacheline_end - user_cb_size + additional_commands; 1424 else 1425 return additional_commands; 1426 } 1427 1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1429 struct hl_ctx *ctx, struct hl_cs *cs, 1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1431 u32 encaps_signal_offset) 1432 { 1433 struct hw_queue_properties *hw_queue_prop; 1434 struct hl_cs_counters_atomic *cntr; 1435 struct hl_cs_job *job; 1436 struct hl_cb *cb; 1437 u32 cb_size; 1438 bool patched_cb; 1439 1440 cntr = &hdev->aggregated_cs_counters; 1441 1442 if (mode == HL_COLLECTIVE_MASTER) { 1443 /* CB size of collective master queue contains 1444 * 4 msg short packets for monitor 1 configuration 1445 * 1 fence packet 1446 * 4 msg short packets for monitor 2 configuration 1447 * 1 fence packet 1448 * 2 msg prot packets for completion and MSI 1449 */ 1450 cb_size = sizeof(struct packet_msg_short) * 8 + 1451 sizeof(struct packet_fence) * 2 + 1452 sizeof(struct packet_msg_prot) * 2; 1453 patched_cb = true; 1454 } else { 1455 /* CB size of collective slave queues contains 1456 * 4 msg short packets for monitor configuration 1457 * 1 fence packet 1458 * 1 additional msg short packet for sob signal 1459 */ 1460 cb_size = sizeof(struct packet_msg_short) * 5 + 1461 sizeof(struct packet_fence); 1462 patched_cb = false; 1463 } 1464 1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1466 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1467 if (!job) { 1468 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1469 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1470 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1471 return -ENOMEM; 1472 } 1473 1474 /* Allocate internal mapped CB for non patched CBs */ 1475 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb); 1476 if (!cb) { 1477 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1478 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1479 kfree(job); 1480 return -EFAULT; 1481 } 1482 1483 job->id = 0; 1484 job->cs = cs; 1485 job->user_cb = cb; 1486 atomic_inc(&job->user_cb->cs_cnt); 1487 job->user_cb_size = cb_size; 1488 job->hw_queue_id = queue_id; 1489 1490 /* since its guaranteed to have only one chunk in the collective wait 1491 * cs, we can use this chunk to set the encapsulated signal offset 1492 * in the jobs. 1493 */ 1494 if (cs->encaps_signals) 1495 job->encaps_sig_wait_offset = encaps_signal_offset; 1496 1497 /* 1498 * No need in parsing, user CB is the patched CB. 1499 * We call hl_cb_destroy() out of two reasons - we don't need 1500 * the CB in the CB idr anymore and to decrement its refcount as 1501 * it was incremented inside hl_cb_kernel_create(). 1502 */ 1503 if (patched_cb) 1504 job->patched_cb = job->user_cb; 1505 else 1506 job->patched_cb = NULL; 1507 1508 job->job_cb_size = job->user_cb_size; 1509 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1510 1511 /* increment refcount as for external queues we get completion */ 1512 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1513 cs_get(cs); 1514 1515 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1516 1517 list_add_tail(&job->cs_node, &cs->job_list); 1518 1519 hl_debugfs_add_job(hdev, job); 1520 1521 return 0; 1522 } 1523 1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1525 struct hl_ctx *ctx, struct hl_cs *cs, 1526 u32 wait_queue_id, u32 collective_engine_id, 1527 u32 encaps_signal_offset) 1528 { 1529 struct gaudi_device *gaudi = hdev->asic_specific; 1530 struct hw_queue_properties *hw_queue_prop; 1531 u32 queue_id, collective_queue, num_jobs; 1532 u32 stream, nic_queue, nic_idx = 0; 1533 bool skip; 1534 int i, rc = 0; 1535 1536 /* Verify wait queue id is configured as master */ 1537 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1538 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1539 dev_err(hdev->dev, 1540 "Queue %d is not configured as collective master\n", 1541 wait_queue_id); 1542 return -EINVAL; 1543 } 1544 1545 /* Verify engine id is supported */ 1546 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1547 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1548 dev_err(hdev->dev, 1549 "Collective wait does not support engine %u\n", 1550 collective_engine_id); 1551 return -EINVAL; 1552 } 1553 1554 stream = wait_queue_id % 4; 1555 1556 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1557 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1558 else 1559 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1560 1561 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1562 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1563 1564 /* First job goes to the collective master queue, it will wait for 1565 * the collective slave queues to finish execution. 1566 * The synchronization is done using two monitors: 1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1568 * reduction engine (DMA5/TPC7). 1569 * 1570 * Rest of the jobs goes to the collective slave queues which will 1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1572 */ 1573 for (i = 0 ; i < num_jobs ; i++) { 1574 if (i == 0) { 1575 queue_id = wait_queue_id; 1576 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1577 HL_COLLECTIVE_MASTER, queue_id, 1578 wait_queue_id, encaps_signal_offset); 1579 } else { 1580 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1581 if (gaudi->hw_cap_initialized & 1582 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1583 skip = false; 1584 else 1585 skip = true; 1586 1587 queue_id = nic_queue; 1588 nic_queue += 4; 1589 nic_idx++; 1590 1591 if (skip) 1592 continue; 1593 } else { 1594 queue_id = collective_queue; 1595 } 1596 1597 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1598 HL_COLLECTIVE_SLAVE, queue_id, 1599 wait_queue_id, encaps_signal_offset); 1600 } 1601 1602 if (rc) 1603 return rc; 1604 } 1605 1606 return rc; 1607 } 1608 1609 static int gaudi_late_init(struct hl_device *hdev) 1610 { 1611 struct gaudi_device *gaudi = hdev->asic_specific; 1612 int rc; 1613 1614 rc = gaudi->cpucp_info_get(hdev); 1615 if (rc) { 1616 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1617 return rc; 1618 } 1619 1620 if ((hdev->card_type == cpucp_card_type_pci) && 1621 (hdev->nic_ports_mask & 0x3)) { 1622 dev_info(hdev->dev, 1623 "PCI card detected, only 8 ports are enabled\n"); 1624 hdev->nic_ports_mask &= ~0x3; 1625 1626 /* Stop and disable unused NIC QMANs */ 1627 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1630 1631 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1634 1635 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1636 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1637 1638 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1639 } 1640 1641 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1642 if (rc) { 1643 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1644 return rc; 1645 } 1646 1647 /* Scrub both SRAM and DRAM */ 1648 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1649 if (rc) 1650 goto disable_pci_access; 1651 1652 rc = gaudi_fetch_psoc_frequency(hdev); 1653 if (rc) { 1654 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1655 goto disable_pci_access; 1656 } 1657 1658 rc = gaudi_mmu_clear_pgt_range(hdev); 1659 if (rc) { 1660 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1661 goto disable_pci_access; 1662 } 1663 1664 rc = gaudi_init_tpc_mem(hdev); 1665 if (rc) { 1666 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1667 goto disable_pci_access; 1668 } 1669 1670 rc = gaudi_collective_init(hdev); 1671 if (rc) { 1672 dev_err(hdev->dev, "Failed to init collective\n"); 1673 goto disable_pci_access; 1674 } 1675 1676 /* We only support a single ASID for the user, so for the sake of optimization, just 1677 * initialize the ASID one time during device initialization with the fixed value of 1 1678 */ 1679 gaudi_mmu_prepare(hdev, 1); 1680 1681 hl_fw_set_pll_profile(hdev); 1682 1683 return 0; 1684 1685 disable_pci_access: 1686 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1687 1688 return rc; 1689 } 1690 1691 static void gaudi_late_fini(struct hl_device *hdev) 1692 { 1693 hl_hwmon_release_resources(hdev); 1694 } 1695 1696 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1697 { 1698 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1699 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1700 int i, j, rc = 0; 1701 1702 /* 1703 * The device CPU works with 40-bits addresses, while bit 39 must be set 1704 * to '1' when accessing the host. 1705 * Bits 49:39 of the full host address are saved for a later 1706 * configuration of the HW to perform extension to 50 bits. 1707 * Because there is a single HW register that holds the extension bits, 1708 * these bits must be identical in all allocated range. 1709 */ 1710 1711 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1712 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1713 &dma_addr_arr[i], 1714 GFP_KERNEL | __GFP_ZERO); 1715 if (!virt_addr_arr[i]) { 1716 rc = -ENOMEM; 1717 goto free_dma_mem_arr; 1718 } 1719 1720 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1721 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1722 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1723 break; 1724 } 1725 1726 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1727 dev_err(hdev->dev, 1728 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1729 rc = -EFAULT; 1730 goto free_dma_mem_arr; 1731 } 1732 1733 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1734 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1735 hdev->cpu_pci_msb_addr = 1736 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1737 1738 if (!hdev->asic_prop.fw_security_enabled) 1739 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1740 1741 free_dma_mem_arr: 1742 for (j = 0 ; j < i ; j++) 1743 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1744 dma_addr_arr[j]); 1745 1746 return rc; 1747 } 1748 1749 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1750 { 1751 struct gaudi_device *gaudi = hdev->asic_specific; 1752 struct gaudi_internal_qman_info *q; 1753 u32 i; 1754 1755 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1756 q = &gaudi->internal_qmans[i]; 1757 if (!q->pq_kernel_addr) 1758 continue; 1759 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1760 } 1761 } 1762 1763 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1764 { 1765 struct gaudi_device *gaudi = hdev->asic_specific; 1766 struct gaudi_internal_qman_info *q; 1767 int rc, i; 1768 1769 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1770 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1771 continue; 1772 1773 q = &gaudi->internal_qmans[i]; 1774 1775 switch (i) { 1776 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1777 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1778 break; 1779 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1780 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1781 break; 1782 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1783 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1784 break; 1785 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1786 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1787 break; 1788 default: 1789 dev_err(hdev->dev, "Bad internal queue index %d", i); 1790 rc = -EINVAL; 1791 goto free_internal_qmans_pq_mem; 1792 } 1793 1794 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1795 GFP_KERNEL | __GFP_ZERO); 1796 if (!q->pq_kernel_addr) { 1797 rc = -ENOMEM; 1798 goto free_internal_qmans_pq_mem; 1799 } 1800 } 1801 1802 return 0; 1803 1804 free_internal_qmans_pq_mem: 1805 gaudi_free_internal_qmans_pq_mem(hdev); 1806 return rc; 1807 } 1808 1809 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1810 { 1811 struct asic_fixed_properties *prop = &hdev->asic_prop; 1812 struct pci_mem_region *region; 1813 1814 /* CFG */ 1815 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1816 region->region_base = CFG_BASE; 1817 region->region_size = CFG_SIZE; 1818 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1819 region->bar_size = CFG_BAR_SIZE; 1820 region->bar_id = CFG_BAR_ID; 1821 region->used = 1; 1822 1823 /* SRAM */ 1824 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1825 region->region_base = SRAM_BASE_ADDR; 1826 region->region_size = SRAM_SIZE; 1827 region->offset_in_bar = 0; 1828 region->bar_size = SRAM_BAR_SIZE; 1829 region->bar_id = SRAM_BAR_ID; 1830 region->used = 1; 1831 1832 /* DRAM */ 1833 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1834 region->region_base = DRAM_PHYS_BASE; 1835 region->region_size = hdev->asic_prop.dram_size; 1836 region->offset_in_bar = 0; 1837 region->bar_size = prop->dram_pci_bar_size; 1838 region->bar_id = HBM_BAR_ID; 1839 region->used = 1; 1840 1841 /* SP SRAM */ 1842 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1843 region->region_base = PSOC_SCRATCHPAD_ADDR; 1844 region->region_size = PSOC_SCRATCHPAD_SIZE; 1845 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1846 region->bar_size = CFG_BAR_SIZE; 1847 region->bar_id = CFG_BAR_ID; 1848 region->used = 1; 1849 } 1850 1851 static int gaudi_sw_init(struct hl_device *hdev) 1852 { 1853 struct gaudi_device *gaudi; 1854 u32 i, event_id = 0; 1855 int rc; 1856 1857 /* Allocate device structure */ 1858 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1859 if (!gaudi) 1860 return -ENOMEM; 1861 1862 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1863 if (gaudi_irq_map_table[i].valid) { 1864 if (event_id == GAUDI_EVENT_SIZE) { 1865 dev_err(hdev->dev, 1866 "Event array exceeds the limit of %u events\n", 1867 GAUDI_EVENT_SIZE); 1868 rc = -EINVAL; 1869 goto free_gaudi_device; 1870 } 1871 1872 gaudi->events[event_id++] = 1873 gaudi_irq_map_table[i].fc_id; 1874 } 1875 } 1876 1877 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1878 1879 hdev->asic_specific = gaudi; 1880 1881 /* Create DMA pool for small allocations */ 1882 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1883 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1884 if (!hdev->dma_pool) { 1885 dev_err(hdev->dev, "failed to create DMA pool\n"); 1886 rc = -ENOMEM; 1887 goto free_gaudi_device; 1888 } 1889 1890 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1891 if (rc) 1892 goto free_dma_pool; 1893 1894 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1895 if (!hdev->cpu_accessible_dma_pool) { 1896 dev_err(hdev->dev, 1897 "Failed to create CPU accessible DMA pool\n"); 1898 rc = -ENOMEM; 1899 goto free_cpu_dma_mem; 1900 } 1901 1902 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1903 (uintptr_t) hdev->cpu_accessible_dma_mem, 1904 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1905 if (rc) { 1906 dev_err(hdev->dev, 1907 "Failed to add memory to CPU accessible DMA pool\n"); 1908 rc = -EFAULT; 1909 goto free_cpu_accessible_dma_pool; 1910 } 1911 1912 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1913 if (rc) 1914 goto free_cpu_accessible_dma_pool; 1915 1916 spin_lock_init(&gaudi->hw_queues_lock); 1917 1918 hdev->supports_sync_stream = true; 1919 hdev->supports_coresight = true; 1920 hdev->supports_staged_submission = true; 1921 hdev->supports_wait_for_multi_cs = true; 1922 1923 hdev->asic_funcs->set_pci_memory_regions(hdev); 1924 hdev->stream_master_qid_arr = 1925 hdev->asic_funcs->get_stream_master_qid_arr(); 1926 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1927 1928 return 0; 1929 1930 free_cpu_accessible_dma_pool: 1931 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1932 free_cpu_dma_mem: 1933 if (!hdev->asic_prop.fw_security_enabled) 1934 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1935 hdev->cpu_pci_msb_addr); 1936 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1937 hdev->cpu_accessible_dma_address); 1938 free_dma_pool: 1939 dma_pool_destroy(hdev->dma_pool); 1940 free_gaudi_device: 1941 kfree(gaudi); 1942 return rc; 1943 } 1944 1945 static int gaudi_sw_fini(struct hl_device *hdev) 1946 { 1947 struct gaudi_device *gaudi = hdev->asic_specific; 1948 1949 gaudi_free_internal_qmans_pq_mem(hdev); 1950 1951 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1952 1953 if (!hdev->asic_prop.fw_security_enabled) 1954 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1955 hdev->cpu_pci_msb_addr); 1956 1957 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1958 hdev->cpu_accessible_dma_address); 1959 1960 dma_pool_destroy(hdev->dma_pool); 1961 1962 kfree(gaudi); 1963 1964 return 0; 1965 } 1966 1967 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1968 { 1969 struct hl_device *hdev = arg; 1970 int i; 1971 1972 if (hdev->disabled) 1973 return IRQ_HANDLED; 1974 1975 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1976 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1977 1978 hl_irq_handler_eq(irq, &hdev->event_queue); 1979 1980 return IRQ_HANDLED; 1981 } 1982 1983 /* 1984 * For backward compatibility, new MSI interrupts should be set after the 1985 * existing CPU and NIC interrupts. 1986 */ 1987 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1988 bool cpu_eq) 1989 { 1990 int msi_vec; 1991 1992 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1993 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1994 GAUDI_EVENT_QUEUE_MSI_IDX); 1995 1996 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1997 (nr + NIC_NUMBER_OF_ENGINES + 1); 1998 1999 return pci_irq_vector(hdev->pdev, msi_vec); 2000 } 2001 2002 static int gaudi_enable_msi_single(struct hl_device *hdev) 2003 { 2004 int rc, irq; 2005 2006 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2007 2008 irq = gaudi_pci_irq_vector(hdev, 0, false); 2009 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2010 "gaudi single msi", hdev); 2011 if (rc) 2012 dev_err(hdev->dev, 2013 "Failed to request single MSI IRQ\n"); 2014 2015 return rc; 2016 } 2017 2018 static int gaudi_enable_msi(struct hl_device *hdev) 2019 { 2020 struct gaudi_device *gaudi = hdev->asic_specific; 2021 int rc; 2022 2023 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2024 return 0; 2025 2026 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2027 if (rc < 0) { 2028 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2029 return rc; 2030 } 2031 2032 rc = gaudi_enable_msi_single(hdev); 2033 if (rc) 2034 goto free_pci_irq_vectors; 2035 2036 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2037 2038 return 0; 2039 2040 free_pci_irq_vectors: 2041 pci_free_irq_vectors(hdev->pdev); 2042 return rc; 2043 } 2044 2045 static void gaudi_sync_irqs(struct hl_device *hdev) 2046 { 2047 struct gaudi_device *gaudi = hdev->asic_specific; 2048 2049 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2050 return; 2051 2052 /* Wait for all pending IRQs to be finished */ 2053 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2054 } 2055 2056 static void gaudi_disable_msi(struct hl_device *hdev) 2057 { 2058 struct gaudi_device *gaudi = hdev->asic_specific; 2059 2060 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2061 return; 2062 2063 gaudi_sync_irqs(hdev); 2064 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2065 pci_free_irq_vectors(hdev->pdev); 2066 2067 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2068 } 2069 2070 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2071 { 2072 struct gaudi_device *gaudi = hdev->asic_specific; 2073 2074 if (hdev->asic_prop.fw_security_enabled) 2075 return; 2076 2077 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2078 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2079 return; 2080 2081 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2082 return; 2083 2084 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2086 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2088 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2090 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2092 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2094 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2096 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2098 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2099 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2100 2101 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2103 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2105 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2107 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2109 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2111 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2113 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2115 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2116 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2117 2118 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2120 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2122 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2124 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2126 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2128 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2130 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2132 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2133 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2134 2135 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2136 } 2137 2138 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2139 { 2140 struct gaudi_device *gaudi = hdev->asic_specific; 2141 2142 if (hdev->asic_prop.fw_security_enabled) 2143 return; 2144 2145 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2146 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2147 return; 2148 2149 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2150 return; 2151 2152 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2154 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2156 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2158 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2160 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2162 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2164 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2166 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2167 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2168 2169 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2171 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2173 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2175 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2177 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2179 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2181 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2183 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2184 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2185 2186 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2188 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2190 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2192 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2194 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2196 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2198 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2200 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2201 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2202 2203 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2204 } 2205 2206 static void gaudi_init_e2e(struct hl_device *hdev) 2207 { 2208 if (hdev->asic_prop.fw_security_enabled) 2209 return; 2210 2211 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2212 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2213 return; 2214 2215 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2216 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2217 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2218 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2219 2220 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2221 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2222 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2223 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2224 2225 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2226 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2227 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2228 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2229 2230 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2231 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2232 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2233 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2234 2235 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2236 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2237 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2238 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2239 2240 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2241 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2242 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2243 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2244 2245 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2246 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2247 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2248 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2249 2250 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2251 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2252 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2253 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2254 2255 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2256 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2257 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2258 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2259 2260 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2261 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2262 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2263 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2264 2265 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2266 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2267 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2268 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2269 2270 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2271 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2272 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2273 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2274 2275 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2276 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2277 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2278 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2279 2280 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2281 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2282 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2283 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2284 2285 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2286 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2287 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2288 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2289 2290 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2291 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2292 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2293 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2294 2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2297 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2298 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2299 2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2302 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2303 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2304 2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2307 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2308 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2309 2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2312 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2313 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2314 2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2317 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2318 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2319 2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2322 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2323 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2324 2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2327 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2328 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2329 2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2332 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2333 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2334 2335 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2336 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2337 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2338 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2339 2340 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2341 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2342 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2343 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2344 2345 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2346 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2347 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2348 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2349 2350 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2351 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2352 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2353 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2354 2355 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2356 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2357 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2358 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2359 2360 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2361 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2362 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2363 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2364 2365 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2366 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2367 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2368 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2369 2370 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2371 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2372 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2373 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2374 2375 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2376 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2377 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2378 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2379 2380 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2381 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2382 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2383 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2384 2385 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2386 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2387 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2388 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2389 2390 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2391 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2392 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2393 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2394 2395 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2396 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2397 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2398 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2399 2400 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2401 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2402 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2403 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2404 2405 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2406 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2407 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2408 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2409 2410 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2411 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2412 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2413 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2414 2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2416 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2417 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2418 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2419 2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2421 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2422 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2423 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2424 2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2426 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2427 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2428 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2429 2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2431 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2432 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2433 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2434 2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2436 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2437 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2438 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2439 2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2441 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2442 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2443 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2444 2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2446 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2447 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2448 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2449 2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2451 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2452 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2453 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2454 } 2455 2456 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2457 { 2458 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2459 2460 if (hdev->asic_prop.fw_security_enabled) 2461 return; 2462 2463 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2464 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2465 return; 2466 2467 hbm0_wr = 0x33333333; 2468 hbm0_rd = 0x77777777; 2469 hbm1_wr = 0x55555555; 2470 hbm1_rd = 0xDDDDDDDD; 2471 2472 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2473 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2474 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2475 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2476 2477 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2478 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2479 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2480 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2481 2482 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2483 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2484 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2485 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2486 2487 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2488 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2489 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2490 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2491 2492 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2493 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2494 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2495 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2496 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2497 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2498 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2499 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2500 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2501 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2502 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2503 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2504 2505 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2506 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2507 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2508 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2509 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2510 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2511 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2512 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2513 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2514 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2515 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2516 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2517 } 2518 2519 static void gaudi_init_golden_registers(struct hl_device *hdev) 2520 { 2521 u32 tpc_offset; 2522 int tpc_id, i; 2523 2524 gaudi_init_e2e(hdev); 2525 gaudi_init_hbm_cred(hdev); 2526 2527 for (tpc_id = 0, tpc_offset = 0; 2528 tpc_id < TPC_NUMBER_OF_ENGINES; 2529 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2530 /* Mask all arithmetic interrupts from TPC */ 2531 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2532 /* Set 16 cache lines */ 2533 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2534 ICACHE_FETCH_LINE_NUM, 2); 2535 } 2536 2537 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2538 for (i = 0 ; i < 128 ; i += 8) 2539 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2540 2541 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2542 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2543 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2544 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2545 } 2546 2547 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2548 int qman_id, dma_addr_t qman_pq_addr) 2549 { 2550 struct cpu_dyn_regs *dyn_regs = 2551 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2552 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2553 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2554 u32 q_off, dma_qm_offset; 2555 u32 dma_qm_err_cfg, irq_handler_offset; 2556 2557 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2558 2559 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2561 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2563 so_base_en_lo = lower_32_bits(CFG_BASE + 2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2565 so_base_en_hi = upper_32_bits(CFG_BASE + 2566 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2567 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2569 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2571 so_base_ws_lo = lower_32_bits(CFG_BASE + 2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2573 so_base_ws_hi = upper_32_bits(CFG_BASE + 2574 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2575 2576 q_off = dma_qm_offset + qman_id * 4; 2577 2578 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2579 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2580 2581 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2582 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2583 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2584 2585 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2586 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2587 QMAN_LDMA_SRC_OFFSET); 2588 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2589 QMAN_LDMA_DST_OFFSET); 2590 2591 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2592 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2593 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2594 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2595 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2596 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2597 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2598 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2599 2600 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2601 2602 /* The following configuration is needed only once per QMAN */ 2603 if (qman_id == 0) { 2604 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2605 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2606 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2607 2608 /* Configure RAZWI IRQ */ 2609 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2610 if (hdev->stop_on_err) 2611 dma_qm_err_cfg |= 2612 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2613 2614 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2615 2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2617 lower_32_bits(CFG_BASE + irq_handler_offset)); 2618 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2619 upper_32_bits(CFG_BASE + irq_handler_offset)); 2620 2621 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2622 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2623 dma_id); 2624 2625 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2626 QM_ARB_ERR_MSG_EN_MASK); 2627 2628 /* Set timeout to maximum */ 2629 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2630 2631 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2632 QMAN_EXTERNAL_MAKE_TRUSTED); 2633 2634 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2635 } 2636 } 2637 2638 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2639 { 2640 struct cpu_dyn_regs *dyn_regs = 2641 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2642 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2643 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2644 u32 irq_handler_offset; 2645 2646 /* Set to maximum possible according to physical size */ 2647 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2648 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2649 2650 /* WA for H/W bug H3-2116 */ 2651 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2652 2653 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2654 if (hdev->stop_on_err) 2655 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2656 2657 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2658 2659 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2660 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2661 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2662 2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2664 lower_32_bits(CFG_BASE + irq_handler_offset)); 2665 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2666 upper_32_bits(CFG_BASE + irq_handler_offset)); 2667 2668 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2669 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2670 WREG32(mmDMA0_CORE_PROT + dma_offset, 2671 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2672 /* If the channel is secured, it should be in MMU bypass mode */ 2673 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2674 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2675 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2676 } 2677 2678 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2679 u32 enable_mask) 2680 { 2681 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2682 2683 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2684 } 2685 2686 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2687 { 2688 struct gaudi_device *gaudi = hdev->asic_specific; 2689 struct hl_hw_queue *q; 2690 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2691 2692 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2693 return; 2694 2695 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2696 dma_id = gaudi_dma_assignment[i]; 2697 /* 2698 * For queues after the CPU Q need to add 1 to get the correct 2699 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2700 * order to get the correct MSI register. 2701 */ 2702 if (dma_id > 1) { 2703 cpu_skip = 1; 2704 nic_skip = NIC_NUMBER_OF_ENGINES; 2705 } else { 2706 cpu_skip = 0; 2707 nic_skip = 0; 2708 } 2709 2710 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2711 q_idx = 4 * dma_id + j + cpu_skip; 2712 q = &hdev->kernel_queues[q_idx]; 2713 q->cq_id = cq_id++; 2714 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2715 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2716 q->bus_address); 2717 } 2718 2719 gaudi_init_dma_core(hdev, dma_id); 2720 2721 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2722 } 2723 2724 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2725 } 2726 2727 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2728 int qman_id, u64 qman_base_addr) 2729 { 2730 struct cpu_dyn_regs *dyn_regs = 2731 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2732 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2733 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2734 u32 dma_qm_err_cfg, irq_handler_offset; 2735 u32 q_off, dma_qm_offset; 2736 2737 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2738 2739 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2741 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2743 so_base_en_lo = lower_32_bits(CFG_BASE + 2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2745 so_base_en_hi = upper_32_bits(CFG_BASE + 2746 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2747 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2749 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2751 so_base_ws_lo = lower_32_bits(CFG_BASE + 2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2753 so_base_ws_hi = upper_32_bits(CFG_BASE + 2754 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2755 2756 q_off = dma_qm_offset + qman_id * 4; 2757 2758 if (qman_id < 4) { 2759 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2760 lower_32_bits(qman_base_addr)); 2761 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2762 upper_32_bits(qman_base_addr)); 2763 2764 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2765 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2766 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2767 2768 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2769 QMAN_CPDMA_SIZE_OFFSET); 2770 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2771 QMAN_CPDMA_SRC_OFFSET); 2772 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2773 QMAN_CPDMA_DST_OFFSET); 2774 } else { 2775 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2776 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2777 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2778 2779 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2780 QMAN_LDMA_SIZE_OFFSET); 2781 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2782 QMAN_LDMA_SRC_OFFSET); 2783 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2784 QMAN_LDMA_DST_OFFSET); 2785 2786 /* Configure RAZWI IRQ */ 2787 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2788 if (hdev->stop_on_err) 2789 dma_qm_err_cfg |= 2790 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2791 2792 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2793 2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2795 lower_32_bits(CFG_BASE + irq_handler_offset)); 2796 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2797 upper_32_bits(CFG_BASE + irq_handler_offset)); 2798 2799 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2800 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2801 dma_id); 2802 2803 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2804 QM_ARB_ERR_MSG_EN_MASK); 2805 2806 /* Set timeout to maximum */ 2807 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2808 2809 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2810 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2811 QMAN_INTERNAL_MAKE_TRUSTED); 2812 } 2813 2814 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2815 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2816 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2817 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2818 2819 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2820 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2822 mtr_base_ws_lo); 2823 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2824 mtr_base_ws_hi); 2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2826 so_base_ws_lo); 2827 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2828 so_base_ws_hi); 2829 } 2830 } 2831 2832 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2833 { 2834 struct gaudi_device *gaudi = hdev->asic_specific; 2835 struct gaudi_internal_qman_info *q; 2836 u64 qman_base_addr; 2837 int i, j, dma_id, internal_q_index; 2838 2839 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2840 return; 2841 2842 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2843 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2844 2845 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2846 /* 2847 * Add the CPU queue in order to get the correct queue 2848 * number as all internal queue are placed after it 2849 */ 2850 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2851 2852 q = &gaudi->internal_qmans[internal_q_index]; 2853 qman_base_addr = (u64) q->pq_dma_addr; 2854 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2855 qman_base_addr); 2856 } 2857 2858 /* Initializing lower CP for HBM DMA QMAN */ 2859 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2860 2861 gaudi_init_dma_core(hdev, dma_id); 2862 2863 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2864 } 2865 2866 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2867 } 2868 2869 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2870 int qman_id, u64 qman_base_addr) 2871 { 2872 struct cpu_dyn_regs *dyn_regs = 2873 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2874 u32 mtr_base_lo, mtr_base_hi; 2875 u32 so_base_lo, so_base_hi; 2876 u32 irq_handler_offset; 2877 u32 q_off, mme_id; 2878 u32 mme_qm_err_cfg; 2879 2880 mtr_base_lo = lower_32_bits(CFG_BASE + 2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2882 mtr_base_hi = upper_32_bits(CFG_BASE + 2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2884 so_base_lo = lower_32_bits(CFG_BASE + 2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2886 so_base_hi = upper_32_bits(CFG_BASE + 2887 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2888 2889 q_off = mme_offset + qman_id * 4; 2890 2891 if (qman_id < 4) { 2892 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2893 lower_32_bits(qman_base_addr)); 2894 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2895 upper_32_bits(qman_base_addr)); 2896 2897 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2898 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2899 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2900 2901 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2902 QMAN_CPDMA_SIZE_OFFSET); 2903 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2904 QMAN_CPDMA_SRC_OFFSET); 2905 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2906 QMAN_CPDMA_DST_OFFSET); 2907 } else { 2908 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2909 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2910 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2911 2912 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2913 QMAN_LDMA_SIZE_OFFSET); 2914 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2915 QMAN_LDMA_SRC_OFFSET); 2916 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2917 QMAN_LDMA_DST_OFFSET); 2918 2919 /* Configure RAZWI IRQ */ 2920 mme_id = mme_offset / 2921 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2922 2923 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2924 if (hdev->stop_on_err) 2925 mme_qm_err_cfg |= 2926 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2927 2928 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2929 2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2931 lower_32_bits(CFG_BASE + irq_handler_offset)); 2932 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2933 upper_32_bits(CFG_BASE + irq_handler_offset)); 2934 2935 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2936 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2937 mme_id); 2938 2939 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2940 QM_ARB_ERR_MSG_EN_MASK); 2941 2942 /* Set timeout to maximum */ 2943 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 2944 2945 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2946 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2947 QMAN_INTERNAL_MAKE_TRUSTED); 2948 } 2949 2950 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2951 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2952 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2953 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2954 } 2955 2956 static void gaudi_init_mme_qmans(struct hl_device *hdev) 2957 { 2958 struct gaudi_device *gaudi = hdev->asic_specific; 2959 struct gaudi_internal_qman_info *q; 2960 u64 qman_base_addr; 2961 u32 mme_offset; 2962 int i, internal_q_index; 2963 2964 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2965 return; 2966 2967 /* 2968 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2969 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2970 */ 2971 2972 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2973 2974 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2975 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2976 q = &gaudi->internal_qmans[internal_q_index]; 2977 qman_base_addr = (u64) q->pq_dma_addr; 2978 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2979 qman_base_addr); 2980 if (i == 3) 2981 mme_offset = 0; 2982 } 2983 2984 /* Initializing lower CP for MME QMANs */ 2985 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2986 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2987 gaudi_init_mme_qman(hdev, 0, 4, 0); 2988 2989 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2990 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2991 2992 gaudi->hw_cap_initialized |= HW_CAP_MME; 2993 } 2994 2995 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 2996 int qman_id, u64 qman_base_addr) 2997 { 2998 struct cpu_dyn_regs *dyn_regs = 2999 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3000 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3001 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3002 u32 tpc_qm_err_cfg, irq_handler_offset; 3003 u32 q_off, tpc_id; 3004 3005 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3007 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3009 so_base_en_lo = lower_32_bits(CFG_BASE + 3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3011 so_base_en_hi = upper_32_bits(CFG_BASE + 3012 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3013 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3015 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3017 so_base_ws_lo = lower_32_bits(CFG_BASE + 3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3019 so_base_ws_hi = upper_32_bits(CFG_BASE + 3020 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3021 3022 q_off = tpc_offset + qman_id * 4; 3023 3024 tpc_id = tpc_offset / 3025 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3026 3027 if (qman_id < 4) { 3028 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3029 lower_32_bits(qman_base_addr)); 3030 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3031 upper_32_bits(qman_base_addr)); 3032 3033 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3034 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3035 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3036 3037 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3038 QMAN_CPDMA_SIZE_OFFSET); 3039 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3040 QMAN_CPDMA_SRC_OFFSET); 3041 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3042 QMAN_CPDMA_DST_OFFSET); 3043 } else { 3044 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3045 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3046 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3047 3048 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3049 QMAN_LDMA_SIZE_OFFSET); 3050 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3051 QMAN_LDMA_SRC_OFFSET); 3052 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3053 QMAN_LDMA_DST_OFFSET); 3054 3055 /* Configure RAZWI IRQ */ 3056 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3057 if (hdev->stop_on_err) 3058 tpc_qm_err_cfg |= 3059 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3060 3061 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3062 3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3064 lower_32_bits(CFG_BASE + irq_handler_offset)); 3065 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3066 upper_32_bits(CFG_BASE + irq_handler_offset)); 3067 3068 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3069 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3070 tpc_id); 3071 3072 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3073 QM_ARB_ERR_MSG_EN_MASK); 3074 3075 /* Set timeout to maximum */ 3076 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3077 3078 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3079 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3080 QMAN_INTERNAL_MAKE_TRUSTED); 3081 } 3082 3083 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3084 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3085 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3086 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3087 3088 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3089 if (tpc_id == 6) { 3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3091 mtr_base_ws_lo); 3092 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3093 mtr_base_ws_hi); 3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3095 so_base_ws_lo); 3096 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3097 so_base_ws_hi); 3098 } 3099 } 3100 3101 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3102 { 3103 struct gaudi_device *gaudi = hdev->asic_specific; 3104 struct gaudi_internal_qman_info *q; 3105 u64 qman_base_addr; 3106 u32 so_base_hi, tpc_offset = 0; 3107 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3108 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3109 int i, tpc_id, internal_q_index; 3110 3111 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3112 return; 3113 3114 so_base_hi = upper_32_bits(CFG_BASE + 3115 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3116 3117 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3118 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3119 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3120 tpc_id * QMAN_STREAMS + i; 3121 q = &gaudi->internal_qmans[internal_q_index]; 3122 qman_base_addr = (u64) q->pq_dma_addr; 3123 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3124 qman_base_addr); 3125 3126 if (i == 3) { 3127 /* Initializing lower CP for TPC QMAN */ 3128 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3129 3130 /* Enable the QMAN and TPC channel */ 3131 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3132 QMAN_TPC_ENABLE); 3133 } 3134 } 3135 3136 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3137 so_base_hi); 3138 3139 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3140 3141 gaudi->hw_cap_initialized |= 3142 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3143 } 3144 } 3145 3146 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3147 int qman_id, u64 qman_base_addr, int nic_id) 3148 { 3149 struct cpu_dyn_regs *dyn_regs = 3150 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3151 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3152 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3153 u32 nic_qm_err_cfg, irq_handler_offset; 3154 u32 q_off; 3155 3156 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3158 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3160 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3162 so_base_en_hi = upper_32_bits(CFG_BASE + 3163 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3164 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3166 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3168 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3170 so_base_ws_hi = upper_32_bits(CFG_BASE + 3171 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3172 3173 q_off = nic_offset + qman_id * 4; 3174 3175 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3176 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3177 3178 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3179 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3180 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3181 3182 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3183 QMAN_LDMA_SIZE_OFFSET); 3184 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3185 QMAN_LDMA_SRC_OFFSET); 3186 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3187 QMAN_LDMA_DST_OFFSET); 3188 3189 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3190 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3191 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3192 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3193 3194 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3195 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3196 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3197 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3198 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3199 3200 if (qman_id == 0) { 3201 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3202 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3203 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3204 3205 /* Configure RAZWI IRQ */ 3206 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3207 if (hdev->stop_on_err) 3208 nic_qm_err_cfg |= 3209 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3210 3211 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3212 3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3214 lower_32_bits(CFG_BASE + irq_handler_offset)); 3215 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3216 upper_32_bits(CFG_BASE + irq_handler_offset)); 3217 3218 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3219 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3220 nic_id); 3221 3222 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3223 QM_ARB_ERR_MSG_EN_MASK); 3224 3225 /* Set timeout to maximum */ 3226 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3227 3228 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3229 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3230 QMAN_INTERNAL_MAKE_TRUSTED); 3231 } 3232 } 3233 3234 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3235 { 3236 struct gaudi_device *gaudi = hdev->asic_specific; 3237 struct gaudi_internal_qman_info *q; 3238 u64 qman_base_addr; 3239 u32 nic_offset = 0; 3240 u32 nic_delta_between_qmans = 3241 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3242 u32 nic_delta_between_nics = 3243 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3244 int i, nic_id, internal_q_index; 3245 3246 if (!hdev->nic_ports_mask) 3247 return; 3248 3249 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3250 return; 3251 3252 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3253 3254 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3255 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3256 nic_offset += nic_delta_between_qmans; 3257 if (nic_id & 1) { 3258 nic_offset -= (nic_delta_between_qmans * 2); 3259 nic_offset += nic_delta_between_nics; 3260 } 3261 continue; 3262 } 3263 3264 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3265 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3266 nic_id * QMAN_STREAMS + i; 3267 q = &gaudi->internal_qmans[internal_q_index]; 3268 qman_base_addr = (u64) q->pq_dma_addr; 3269 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3270 qman_base_addr, nic_id); 3271 } 3272 3273 /* Enable the QMAN */ 3274 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3275 3276 nic_offset += nic_delta_between_qmans; 3277 if (nic_id & 1) { 3278 nic_offset -= (nic_delta_between_qmans * 2); 3279 nic_offset += nic_delta_between_nics; 3280 } 3281 3282 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3283 } 3284 } 3285 3286 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3287 { 3288 struct gaudi_device *gaudi = hdev->asic_specific; 3289 3290 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3291 return; 3292 3293 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3294 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3295 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3296 } 3297 3298 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3299 { 3300 struct gaudi_device *gaudi = hdev->asic_specific; 3301 3302 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3303 return; 3304 3305 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3306 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3307 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3308 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3309 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3310 } 3311 3312 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3313 { 3314 struct gaudi_device *gaudi = hdev->asic_specific; 3315 3316 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3317 return; 3318 3319 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3320 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3321 } 3322 3323 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3324 { 3325 struct gaudi_device *gaudi = hdev->asic_specific; 3326 u32 tpc_offset = 0; 3327 int tpc_id; 3328 3329 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3330 return; 3331 3332 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3333 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3334 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3335 } 3336 } 3337 3338 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3339 { 3340 struct gaudi_device *gaudi = hdev->asic_specific; 3341 u32 nic_mask, nic_offset = 0; 3342 u32 nic_delta_between_qmans = 3343 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3344 u32 nic_delta_between_nics = 3345 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3346 int nic_id; 3347 3348 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3349 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3350 3351 if (gaudi->hw_cap_initialized & nic_mask) 3352 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3353 3354 nic_offset += nic_delta_between_qmans; 3355 if (nic_id & 1) { 3356 nic_offset -= (nic_delta_between_qmans * 2); 3357 nic_offset += nic_delta_between_nics; 3358 } 3359 } 3360 } 3361 3362 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3363 { 3364 struct gaudi_device *gaudi = hdev->asic_specific; 3365 3366 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3367 return; 3368 3369 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3370 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3371 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3372 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3373 } 3374 3375 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3376 { 3377 struct gaudi_device *gaudi = hdev->asic_specific; 3378 3379 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3380 return; 3381 3382 /* Stop CPs of HBM DMA QMANs */ 3383 3384 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3385 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3386 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3387 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3388 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3389 } 3390 3391 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3392 { 3393 struct gaudi_device *gaudi = hdev->asic_specific; 3394 3395 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3396 return; 3397 3398 /* Stop CPs of MME QMANs */ 3399 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3400 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3401 } 3402 3403 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3404 { 3405 struct gaudi_device *gaudi = hdev->asic_specific; 3406 3407 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3408 return; 3409 3410 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3411 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3412 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3413 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3414 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3415 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3416 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3417 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3418 } 3419 3420 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3421 { 3422 struct gaudi_device *gaudi = hdev->asic_specific; 3423 3424 /* Stop upper CPs of QMANs */ 3425 3426 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3427 WREG32(mmNIC0_QM0_GLBL_CFG1, 3428 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3429 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3430 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3431 3432 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3433 WREG32(mmNIC0_QM1_GLBL_CFG1, 3434 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3435 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3436 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3437 3438 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3439 WREG32(mmNIC1_QM0_GLBL_CFG1, 3440 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3441 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3442 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3443 3444 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3445 WREG32(mmNIC1_QM1_GLBL_CFG1, 3446 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3447 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3448 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3449 3450 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3451 WREG32(mmNIC2_QM0_GLBL_CFG1, 3452 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3453 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3454 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3455 3456 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3457 WREG32(mmNIC2_QM1_GLBL_CFG1, 3458 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3459 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3460 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3461 3462 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3463 WREG32(mmNIC3_QM0_GLBL_CFG1, 3464 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3465 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3466 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3467 3468 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3469 WREG32(mmNIC3_QM1_GLBL_CFG1, 3470 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3471 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3472 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3473 3474 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3475 WREG32(mmNIC4_QM0_GLBL_CFG1, 3476 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3477 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3478 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3479 3480 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3481 WREG32(mmNIC4_QM1_GLBL_CFG1, 3482 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3483 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3484 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3485 } 3486 3487 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3488 { 3489 struct gaudi_device *gaudi = hdev->asic_specific; 3490 3491 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3492 return; 3493 3494 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3495 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3496 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3497 } 3498 3499 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3500 { 3501 struct gaudi_device *gaudi = hdev->asic_specific; 3502 3503 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3504 return; 3505 3506 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3507 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3508 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3509 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3510 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3511 } 3512 3513 static void gaudi_mme_stall(struct hl_device *hdev) 3514 { 3515 struct gaudi_device *gaudi = hdev->asic_specific; 3516 3517 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3518 return; 3519 3520 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3521 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3522 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3523 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3524 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3525 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3526 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3527 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3528 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3529 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3530 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3531 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3532 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3533 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3534 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3535 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3536 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3537 } 3538 3539 static void gaudi_tpc_stall(struct hl_device *hdev) 3540 { 3541 struct gaudi_device *gaudi = hdev->asic_specific; 3542 3543 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3544 return; 3545 3546 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3547 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3548 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3549 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3550 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3551 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3552 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3553 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3554 } 3555 3556 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3557 { 3558 u32 qman_offset; 3559 int i; 3560 3561 if (hdev->asic_prop.fw_security_enabled) 3562 return; 3563 3564 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3565 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3566 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3567 3568 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3569 } 3570 3571 WREG32(mmMME0_QM_CGM_CFG, 0); 3572 WREG32(mmMME0_QM_CGM_CFG1, 0); 3573 WREG32(mmMME2_QM_CGM_CFG, 0); 3574 WREG32(mmMME2_QM_CGM_CFG1, 0); 3575 3576 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3577 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3578 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3579 3580 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3581 } 3582 } 3583 3584 static void gaudi_enable_timestamp(struct hl_device *hdev) 3585 { 3586 /* Disable the timestamp counter */ 3587 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3588 3589 /* Zero the lower/upper parts of the 64-bit counter */ 3590 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3592 3593 /* Enable the counter */ 3594 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3595 } 3596 3597 static void gaudi_disable_timestamp(struct hl_device *hdev) 3598 { 3599 /* Disable the timestamp counter */ 3600 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3601 } 3602 3603 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3604 { 3605 u32 wait_timeout_ms; 3606 3607 if (hdev->pldm) 3608 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3609 else 3610 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3611 3612 if (fw_reset) 3613 goto skip_engines; 3614 3615 gaudi_stop_nic_qmans(hdev); 3616 gaudi_stop_mme_qmans(hdev); 3617 gaudi_stop_tpc_qmans(hdev); 3618 gaudi_stop_hbm_dma_qmans(hdev); 3619 gaudi_stop_pci_dma_qmans(hdev); 3620 3621 msleep(wait_timeout_ms); 3622 3623 gaudi_pci_dma_stall(hdev); 3624 gaudi_hbm_dma_stall(hdev); 3625 gaudi_tpc_stall(hdev); 3626 gaudi_mme_stall(hdev); 3627 3628 msleep(wait_timeout_ms); 3629 3630 gaudi_disable_nic_qmans(hdev); 3631 gaudi_disable_mme_qmans(hdev); 3632 gaudi_disable_tpc_qmans(hdev); 3633 gaudi_disable_hbm_dma_qmans(hdev); 3634 gaudi_disable_pci_dma_qmans(hdev); 3635 3636 gaudi_disable_timestamp(hdev); 3637 3638 skip_engines: 3639 gaudi_disable_msi(hdev); 3640 } 3641 3642 static int gaudi_mmu_init(struct hl_device *hdev) 3643 { 3644 struct asic_fixed_properties *prop = &hdev->asic_prop; 3645 struct gaudi_device *gaudi = hdev->asic_specific; 3646 u64 hop0_addr; 3647 int rc, i; 3648 3649 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3650 return 0; 3651 3652 for (i = 0 ; i < prop->max_asid ; i++) { 3653 hop0_addr = prop->mmu_pgt_addr + 3654 (i * prop->dmmu.hop_table_size); 3655 3656 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3657 if (rc) { 3658 dev_err(hdev->dev, 3659 "failed to set hop0 addr for asid %d\n", i); 3660 return rc; 3661 } 3662 } 3663 3664 /* init MMU cache manage page */ 3665 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3666 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3667 3668 /* mem cache invalidation */ 3669 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3670 3671 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3672 if (rc) 3673 return rc; 3674 3675 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3676 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3677 3678 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3679 3680 /* 3681 * The H/W expects the first PI after init to be 1. After wraparound 3682 * we'll write 0. 3683 */ 3684 gaudi->mmu_cache_inv_pi = 1; 3685 3686 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3687 3688 return 0; 3689 } 3690 3691 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3692 { 3693 void __iomem *dst; 3694 3695 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3696 3697 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3698 } 3699 3700 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3701 { 3702 void __iomem *dst; 3703 3704 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3705 3706 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3707 } 3708 3709 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3710 { 3711 struct dynamic_fw_load_mgr *dynamic_loader; 3712 struct cpu_dyn_regs *dyn_regs; 3713 3714 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3715 3716 /* 3717 * here we update initial values for few specific dynamic regs (as 3718 * before reading the first descriptor from FW those value has to be 3719 * hard-coded) in later stages of the protocol those values will be 3720 * updated automatically by reading the FW descriptor so data there 3721 * will always be up-to-date 3722 */ 3723 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3724 dyn_regs->kmd_msg_to_cpu = 3725 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3726 dyn_regs->cpu_cmd_status_to_host = 3727 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3728 3729 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3730 } 3731 3732 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3733 { 3734 struct static_fw_load_mgr *static_loader; 3735 3736 static_loader = &hdev->fw_loader.static_loader; 3737 3738 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3739 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3740 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3741 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3742 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3743 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3744 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3745 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3746 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3747 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3748 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3749 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3750 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3751 GAUDI_PLDM_RESET_WAIT_MSEC : 3752 GAUDI_CPU_RESET_WAIT_MSEC; 3753 } 3754 3755 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3756 { 3757 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3758 3759 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3760 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3761 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3762 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3763 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3764 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3765 } 3766 3767 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3768 { 3769 struct asic_fixed_properties *prop = &hdev->asic_prop; 3770 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3771 3772 /* fill common fields */ 3773 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3774 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3775 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3776 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3777 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3778 fw_loader->skip_bmc = !hdev->bmc_enable; 3779 fw_loader->sram_bar_id = SRAM_BAR_ID; 3780 fw_loader->dram_bar_id = HBM_BAR_ID; 3781 3782 if (prop->dynamic_fw_load) 3783 gaudi_init_dynamic_firmware_loader(hdev); 3784 else 3785 gaudi_init_static_firmware_loader(hdev); 3786 } 3787 3788 static int gaudi_init_cpu(struct hl_device *hdev) 3789 { 3790 struct gaudi_device *gaudi = hdev->asic_specific; 3791 int rc; 3792 3793 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3794 return 0; 3795 3796 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3797 return 0; 3798 3799 /* 3800 * The device CPU works with 40 bits addresses. 3801 * This register sets the extension to 50 bits. 3802 */ 3803 if (!hdev->asic_prop.fw_security_enabled) 3804 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3805 3806 rc = hl_fw_init_cpu(hdev); 3807 3808 if (rc) 3809 return rc; 3810 3811 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3812 3813 return 0; 3814 } 3815 3816 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3817 { 3818 struct cpu_dyn_regs *dyn_regs = 3819 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3820 struct asic_fixed_properties *prop = &hdev->asic_prop; 3821 struct gaudi_device *gaudi = hdev->asic_specific; 3822 u32 status, irq_handler_offset; 3823 struct hl_eq *eq; 3824 struct hl_hw_queue *cpu_pq = 3825 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3826 int err; 3827 3828 if (!hdev->cpu_queues_enable) 3829 return 0; 3830 3831 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3832 return 0; 3833 3834 eq = &hdev->event_queue; 3835 3836 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3837 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3838 3839 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3840 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3841 3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3843 lower_32_bits(hdev->cpu_accessible_dma_address)); 3844 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3845 upper_32_bits(hdev->cpu_accessible_dma_address)); 3846 3847 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3848 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3849 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3850 3851 /* Used for EQ CI */ 3852 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3853 3854 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3855 3856 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3857 3858 irq_handler_offset = prop->gic_interrupts_enable ? 3859 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3860 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3861 3862 WREG32(irq_handler_offset, 3863 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3864 3865 err = hl_poll_timeout( 3866 hdev, 3867 mmCPU_IF_QUEUE_INIT, 3868 status, 3869 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3870 1000, 3871 cpu_timeout); 3872 3873 if (err) { 3874 dev_err(hdev->dev, 3875 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3876 return -EIO; 3877 } 3878 3879 /* update FW application security bits */ 3880 if (prop->fw_cpu_boot_dev_sts0_valid) 3881 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3882 if (prop->fw_cpu_boot_dev_sts1_valid) 3883 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3884 3885 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3886 return 0; 3887 } 3888 3889 static void gaudi_pre_hw_init(struct hl_device *hdev) 3890 { 3891 /* Perform read from the device to make sure device is up */ 3892 RREG32(mmHW_STATE); 3893 3894 if (!hdev->asic_prop.fw_security_enabled) { 3895 /* Set the access through PCI bars (Linux driver only) as 3896 * secured 3897 */ 3898 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3899 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3900 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3901 3902 /* Perform read to flush the waiting writes to ensure 3903 * configuration was set in the device 3904 */ 3905 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3906 } 3907 3908 /* 3909 * Let's mark in the H/W that we have reached this point. We check 3910 * this value in the reset_before_init function to understand whether 3911 * we need to reset the chip before doing H/W init. This register is 3912 * cleared by the H/W upon H/W reset 3913 */ 3914 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3915 } 3916 3917 static int gaudi_hw_init(struct hl_device *hdev) 3918 { 3919 struct gaudi_device *gaudi = hdev->asic_specific; 3920 int rc; 3921 3922 gaudi_pre_hw_init(hdev); 3923 3924 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3925 * So we set it here and if anyone tries to move it later to 3926 * a different address, there will be an error 3927 */ 3928 if (hdev->asic_prop.iatu_done_by_fw) 3929 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3930 3931 /* 3932 * Before pushing u-boot/linux to device, need to set the hbm bar to 3933 * base address of dram 3934 */ 3935 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 3936 dev_err(hdev->dev, 3937 "failed to map HBM bar to DRAM base address\n"); 3938 return -EIO; 3939 } 3940 3941 rc = gaudi_init_cpu(hdev); 3942 if (rc) { 3943 dev_err(hdev->dev, "failed to initialize CPU\n"); 3944 return rc; 3945 } 3946 3947 /* In case the clock gating was enabled in preboot we need to disable 3948 * it here before touching the MME/TPC registers. 3949 */ 3950 gaudi_disable_clock_gating(hdev); 3951 3952 /* SRAM scrambler must be initialized after CPU is running from HBM */ 3953 gaudi_init_scrambler_sram(hdev); 3954 3955 /* This is here just in case we are working without CPU */ 3956 gaudi_init_scrambler_hbm(hdev); 3957 3958 gaudi_init_golden_registers(hdev); 3959 3960 rc = gaudi_mmu_init(hdev); 3961 if (rc) 3962 return rc; 3963 3964 gaudi_init_security(hdev); 3965 3966 gaudi_init_pci_dma_qmans(hdev); 3967 3968 gaudi_init_hbm_dma_qmans(hdev); 3969 3970 gaudi_init_mme_qmans(hdev); 3971 3972 gaudi_init_tpc_qmans(hdev); 3973 3974 gaudi_init_nic_qmans(hdev); 3975 3976 gaudi_enable_timestamp(hdev); 3977 3978 /* MSI must be enabled before CPU queues and NIC are initialized */ 3979 rc = gaudi_enable_msi(hdev); 3980 if (rc) 3981 goto disable_queues; 3982 3983 /* must be called after MSI was enabled */ 3984 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 3985 if (rc) { 3986 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3987 rc); 3988 goto disable_msi; 3989 } 3990 3991 /* Perform read from the device to flush all configuration */ 3992 RREG32(mmHW_STATE); 3993 3994 return 0; 3995 3996 disable_msi: 3997 gaudi_disable_msi(hdev); 3998 disable_queues: 3999 gaudi_disable_mme_qmans(hdev); 4000 gaudi_disable_pci_dma_qmans(hdev); 4001 4002 return rc; 4003 } 4004 4005 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4006 { 4007 struct cpu_dyn_regs *dyn_regs = 4008 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4009 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4010 struct gaudi_device *gaudi = hdev->asic_specific; 4011 bool driver_performs_reset; 4012 4013 if (!hard_reset) { 4014 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4015 return 0; 4016 } 4017 4018 if (hdev->pldm) { 4019 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4020 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4021 } else { 4022 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4023 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4024 } 4025 4026 if (fw_reset) { 4027 dev_dbg(hdev->dev, 4028 "Firmware performs HARD reset, going to wait %dms\n", 4029 reset_timeout_ms); 4030 4031 goto skip_reset; 4032 } 4033 4034 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4035 !hdev->asic_prop.hard_reset_done_by_fw); 4036 4037 /* Set device to handle FLR by H/W as we will put the device CPU to 4038 * halt mode 4039 */ 4040 if (driver_performs_reset) 4041 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4042 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4043 4044 /* If linux is loaded in the device CPU we need to communicate with it 4045 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4046 * registers in case of old F/Ws 4047 */ 4048 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4049 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4050 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4051 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4052 4053 WREG32(irq_handler_offset, 4054 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4055 4056 /* This is a hail-mary attempt to revive the card in the small chance that the 4057 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4058 * In that case, triggering reset through GIC won't help. We need to trigger the 4059 * reset as if Linux wasn't loaded. 4060 * 4061 * We do it only if the reset cause was HB, because that would be the indication 4062 * of such an event. 4063 * 4064 * In case watchdog hasn't expired but we still got HB, then this won't do any 4065 * damage. 4066 */ 4067 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4068 if (hdev->asic_prop.hard_reset_done_by_fw) 4069 hl_fw_ask_hard_reset_without_linux(hdev); 4070 else 4071 hl_fw_ask_halt_machine_without_linux(hdev); 4072 } 4073 } else { 4074 if (hdev->asic_prop.hard_reset_done_by_fw) 4075 hl_fw_ask_hard_reset_without_linux(hdev); 4076 else 4077 hl_fw_ask_halt_machine_without_linux(hdev); 4078 } 4079 4080 if (driver_performs_reset) { 4081 4082 /* Configure the reset registers. Must be done as early as 4083 * possible in case we fail during H/W initialization 4084 */ 4085 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4086 (CFG_RST_H_DMA_MASK | 4087 CFG_RST_H_MME_MASK | 4088 CFG_RST_H_SM_MASK | 4089 CFG_RST_H_TPC_7_MASK)); 4090 4091 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4092 4093 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4094 (CFG_RST_H_HBM_MASK | 4095 CFG_RST_H_TPC_7_MASK | 4096 CFG_RST_H_NIC_MASK | 4097 CFG_RST_H_SM_MASK | 4098 CFG_RST_H_DMA_MASK | 4099 CFG_RST_H_MME_MASK | 4100 CFG_RST_H_CPU_MASK | 4101 CFG_RST_H_MMU_MASK)); 4102 4103 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4104 (CFG_RST_L_IF_MASK | 4105 CFG_RST_L_PSOC_MASK | 4106 CFG_RST_L_TPC_MASK)); 4107 4108 msleep(cpu_timeout_ms); 4109 4110 /* Tell ASIC not to re-initialize PCIe */ 4111 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4112 4113 /* Restart BTL/BLR upon hard-reset */ 4114 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4115 4116 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4117 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4118 4119 dev_dbg(hdev->dev, 4120 "Issued HARD reset command, going to wait %dms\n", 4121 reset_timeout_ms); 4122 } else { 4123 dev_dbg(hdev->dev, 4124 "Firmware performs HARD reset, going to wait %dms\n", 4125 reset_timeout_ms); 4126 } 4127 4128 skip_reset: 4129 /* 4130 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4131 * itself is in reset. Need to wait until the reset is deasserted 4132 */ 4133 msleep(reset_timeout_ms); 4134 4135 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4136 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4137 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4138 return -ETIMEDOUT; 4139 } 4140 4141 if (gaudi) { 4142 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4143 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4144 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4145 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4146 HW_CAP_HBM_SCRAMBLER); 4147 4148 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4149 4150 hdev->device_cpu_is_halted = false; 4151 } 4152 return 0; 4153 } 4154 4155 static int gaudi_suspend(struct hl_device *hdev) 4156 { 4157 int rc; 4158 4159 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4160 if (rc) 4161 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4162 4163 return rc; 4164 } 4165 4166 static int gaudi_resume(struct hl_device *hdev) 4167 { 4168 return gaudi_init_iatu(hdev); 4169 } 4170 4171 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4172 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4173 { 4174 int rc; 4175 4176 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4177 VM_DONTCOPY | VM_NORESERVE); 4178 4179 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4180 (dma_addr - HOST_PHYS_BASE), size); 4181 if (rc) 4182 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4183 4184 return rc; 4185 } 4186 4187 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4188 { 4189 struct cpu_dyn_regs *dyn_regs = 4190 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4191 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4192 struct gaudi_device *gaudi = hdev->asic_specific; 4193 bool invalid_queue = false; 4194 int dma_id; 4195 4196 switch (hw_queue_id) { 4197 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4198 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4199 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4200 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4201 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4202 break; 4203 4204 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4205 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4206 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4207 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4208 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4209 break; 4210 4211 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4212 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4213 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4214 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4215 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4216 break; 4217 4218 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4219 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4220 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4221 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4222 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4223 break; 4224 4225 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4226 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4227 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4228 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4229 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4230 break; 4231 4232 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4233 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4234 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4235 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4236 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4237 break; 4238 4239 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4240 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4241 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4242 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4243 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4244 break; 4245 4246 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4247 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4248 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4249 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4250 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4251 break; 4252 4253 case GAUDI_QUEUE_ID_CPU_PQ: 4254 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4255 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4256 else 4257 invalid_queue = true; 4258 break; 4259 4260 case GAUDI_QUEUE_ID_MME_0_0: 4261 db_reg_offset = mmMME2_QM_PQ_PI_0; 4262 break; 4263 4264 case GAUDI_QUEUE_ID_MME_0_1: 4265 db_reg_offset = mmMME2_QM_PQ_PI_1; 4266 break; 4267 4268 case GAUDI_QUEUE_ID_MME_0_2: 4269 db_reg_offset = mmMME2_QM_PQ_PI_2; 4270 break; 4271 4272 case GAUDI_QUEUE_ID_MME_0_3: 4273 db_reg_offset = mmMME2_QM_PQ_PI_3; 4274 break; 4275 4276 case GAUDI_QUEUE_ID_MME_1_0: 4277 db_reg_offset = mmMME0_QM_PQ_PI_0; 4278 break; 4279 4280 case GAUDI_QUEUE_ID_MME_1_1: 4281 db_reg_offset = mmMME0_QM_PQ_PI_1; 4282 break; 4283 4284 case GAUDI_QUEUE_ID_MME_1_2: 4285 db_reg_offset = mmMME0_QM_PQ_PI_2; 4286 break; 4287 4288 case GAUDI_QUEUE_ID_MME_1_3: 4289 db_reg_offset = mmMME0_QM_PQ_PI_3; 4290 break; 4291 4292 case GAUDI_QUEUE_ID_TPC_0_0: 4293 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4294 break; 4295 4296 case GAUDI_QUEUE_ID_TPC_0_1: 4297 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4298 break; 4299 4300 case GAUDI_QUEUE_ID_TPC_0_2: 4301 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4302 break; 4303 4304 case GAUDI_QUEUE_ID_TPC_0_3: 4305 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4306 break; 4307 4308 case GAUDI_QUEUE_ID_TPC_1_0: 4309 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4310 break; 4311 4312 case GAUDI_QUEUE_ID_TPC_1_1: 4313 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4314 break; 4315 4316 case GAUDI_QUEUE_ID_TPC_1_2: 4317 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4318 break; 4319 4320 case GAUDI_QUEUE_ID_TPC_1_3: 4321 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4322 break; 4323 4324 case GAUDI_QUEUE_ID_TPC_2_0: 4325 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4326 break; 4327 4328 case GAUDI_QUEUE_ID_TPC_2_1: 4329 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4330 break; 4331 4332 case GAUDI_QUEUE_ID_TPC_2_2: 4333 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4334 break; 4335 4336 case GAUDI_QUEUE_ID_TPC_2_3: 4337 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4338 break; 4339 4340 case GAUDI_QUEUE_ID_TPC_3_0: 4341 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4342 break; 4343 4344 case GAUDI_QUEUE_ID_TPC_3_1: 4345 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4346 break; 4347 4348 case GAUDI_QUEUE_ID_TPC_3_2: 4349 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4350 break; 4351 4352 case GAUDI_QUEUE_ID_TPC_3_3: 4353 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4354 break; 4355 4356 case GAUDI_QUEUE_ID_TPC_4_0: 4357 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4358 break; 4359 4360 case GAUDI_QUEUE_ID_TPC_4_1: 4361 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4362 break; 4363 4364 case GAUDI_QUEUE_ID_TPC_4_2: 4365 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4366 break; 4367 4368 case GAUDI_QUEUE_ID_TPC_4_3: 4369 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4370 break; 4371 4372 case GAUDI_QUEUE_ID_TPC_5_0: 4373 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4374 break; 4375 4376 case GAUDI_QUEUE_ID_TPC_5_1: 4377 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4378 break; 4379 4380 case GAUDI_QUEUE_ID_TPC_5_2: 4381 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4382 break; 4383 4384 case GAUDI_QUEUE_ID_TPC_5_3: 4385 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4386 break; 4387 4388 case GAUDI_QUEUE_ID_TPC_6_0: 4389 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4390 break; 4391 4392 case GAUDI_QUEUE_ID_TPC_6_1: 4393 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4394 break; 4395 4396 case GAUDI_QUEUE_ID_TPC_6_2: 4397 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4398 break; 4399 4400 case GAUDI_QUEUE_ID_TPC_6_3: 4401 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4402 break; 4403 4404 case GAUDI_QUEUE_ID_TPC_7_0: 4405 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4406 break; 4407 4408 case GAUDI_QUEUE_ID_TPC_7_1: 4409 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4410 break; 4411 4412 case GAUDI_QUEUE_ID_TPC_7_2: 4413 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4414 break; 4415 4416 case GAUDI_QUEUE_ID_TPC_7_3: 4417 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4418 break; 4419 4420 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4421 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4422 invalid_queue = true; 4423 4424 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4425 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4426 break; 4427 4428 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4429 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4430 invalid_queue = true; 4431 4432 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4433 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4434 break; 4435 4436 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4437 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4438 invalid_queue = true; 4439 4440 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4441 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4442 break; 4443 4444 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4445 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4446 invalid_queue = true; 4447 4448 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4449 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4450 break; 4451 4452 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4453 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4454 invalid_queue = true; 4455 4456 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4457 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4458 break; 4459 4460 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4461 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4462 invalid_queue = true; 4463 4464 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4465 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4466 break; 4467 4468 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4469 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4470 invalid_queue = true; 4471 4472 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4473 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4474 break; 4475 4476 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4477 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4478 invalid_queue = true; 4479 4480 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4481 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4482 break; 4483 4484 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4485 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4486 invalid_queue = true; 4487 4488 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4489 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4490 break; 4491 4492 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4493 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4494 invalid_queue = true; 4495 4496 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4497 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4498 break; 4499 4500 default: 4501 invalid_queue = true; 4502 } 4503 4504 if (invalid_queue) { 4505 /* Should never get here */ 4506 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4507 hw_queue_id); 4508 return; 4509 } 4510 4511 db_value = pi; 4512 4513 /* ring the doorbell */ 4514 WREG32(db_reg_offset, db_value); 4515 4516 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4517 /* make sure device CPU will read latest data from host */ 4518 mb(); 4519 4520 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4521 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4522 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4523 4524 WREG32(irq_handler_offset, 4525 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4526 } 4527 } 4528 4529 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4530 struct hl_bd *bd) 4531 { 4532 __le64 *pbd = (__le64 *) bd; 4533 4534 /* The QMANs are on the host memory so a simple copy suffice */ 4535 pqe[0] = pbd[0]; 4536 pqe[1] = pbd[1]; 4537 } 4538 4539 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4540 dma_addr_t *dma_handle, gfp_t flags) 4541 { 4542 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4543 dma_handle, flags); 4544 4545 /* Shift to the device's base physical address of host memory */ 4546 if (kernel_addr) 4547 *dma_handle += HOST_PHYS_BASE; 4548 4549 return kernel_addr; 4550 } 4551 4552 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4553 void *cpu_addr, dma_addr_t dma_handle) 4554 { 4555 /* Cancel the device's base physical address of host memory */ 4556 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4557 4558 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4559 } 4560 4561 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4562 { 4563 struct asic_fixed_properties *prop = &hdev->asic_prop; 4564 u64 cur_addr = prop->dram_user_base_address; 4565 u32 chunk_size, busy; 4566 int rc, dma_id; 4567 4568 while (cur_addr < prop->dram_end_address) { 4569 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4570 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4571 4572 chunk_size = 4573 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4574 4575 dev_dbg(hdev->dev, 4576 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4577 cur_addr, cur_addr + chunk_size); 4578 4579 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4580 lower_32_bits(val)); 4581 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4582 upper_32_bits(val)); 4583 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4584 lower_32_bits(cur_addr)); 4585 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4586 upper_32_bits(cur_addr)); 4587 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4588 chunk_size); 4589 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4590 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4591 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4592 4593 cur_addr += chunk_size; 4594 4595 if (cur_addr == prop->dram_end_address) 4596 break; 4597 } 4598 4599 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4600 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4601 4602 rc = hl_poll_timeout( 4603 hdev, 4604 mmDMA0_CORE_STS0 + dma_offset, 4605 busy, 4606 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4607 1000, 4608 HBM_SCRUBBING_TIMEOUT_US); 4609 4610 if (rc) { 4611 dev_err(hdev->dev, 4612 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4613 dma_id); 4614 return -EIO; 4615 } 4616 } 4617 } 4618 4619 return 0; 4620 } 4621 4622 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4623 { 4624 struct asic_fixed_properties *prop = &hdev->asic_prop; 4625 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US; 4626 u64 addr, size, val = hdev->memory_scrub_val; 4627 ktime_t timeout; 4628 int rc = 0; 4629 4630 if (!hdev->memory_scrub) 4631 return 0; 4632 4633 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4634 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4635 if (ktime_compare(ktime_get(), timeout) > 0) { 4636 dev_err(hdev->dev, "waiting for idle timeout\n"); 4637 return -ETIMEDOUT; 4638 } 4639 usleep_range((1000 >> 2) + 1, 1000); 4640 } 4641 4642 /* Scrub SRAM */ 4643 addr = prop->sram_user_base_address; 4644 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4645 4646 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4647 addr, addr + size, val); 4648 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4649 if (rc) { 4650 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4651 return rc; 4652 } 4653 4654 /* Scrub HBM using all DMA channels in parallel */ 4655 rc = gaudi_scrub_device_dram(hdev, val); 4656 if (rc) { 4657 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4658 return rc; 4659 } 4660 4661 return 0; 4662 } 4663 4664 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4665 u32 queue_id, dma_addr_t *dma_handle, 4666 u16 *queue_len) 4667 { 4668 struct gaudi_device *gaudi = hdev->asic_specific; 4669 struct gaudi_internal_qman_info *q; 4670 4671 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4672 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4673 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4674 return NULL; 4675 } 4676 4677 q = &gaudi->internal_qmans[queue_id]; 4678 *dma_handle = q->pq_dma_addr; 4679 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4680 4681 return q->pq_kernel_addr; 4682 } 4683 4684 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4685 u16 len, u32 timeout, u64 *result) 4686 { 4687 struct gaudi_device *gaudi = hdev->asic_specific; 4688 4689 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4690 if (result) 4691 *result = 0; 4692 return 0; 4693 } 4694 4695 if (!timeout) 4696 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4697 4698 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4699 timeout, result); 4700 } 4701 4702 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4703 { 4704 struct packet_msg_prot *fence_pkt; 4705 dma_addr_t pkt_dma_addr; 4706 u32 fence_val, tmp, timeout_usec; 4707 dma_addr_t fence_dma_addr; 4708 u32 *fence_ptr; 4709 int rc; 4710 4711 if (hdev->pldm) 4712 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4713 else 4714 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4715 4716 fence_val = GAUDI_QMAN0_FENCE_VAL; 4717 4718 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4719 if (!fence_ptr) { 4720 dev_err(hdev->dev, 4721 "Failed to allocate memory for H/W queue %d testing\n", 4722 hw_queue_id); 4723 return -ENOMEM; 4724 } 4725 4726 *fence_ptr = 0; 4727 4728 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4729 &pkt_dma_addr); 4730 if (!fence_pkt) { 4731 dev_err(hdev->dev, 4732 "Failed to allocate packet for H/W queue %d testing\n", 4733 hw_queue_id); 4734 rc = -ENOMEM; 4735 goto free_fence_ptr; 4736 } 4737 4738 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4739 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4740 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4741 4742 fence_pkt->ctl = cpu_to_le32(tmp); 4743 fence_pkt->value = cpu_to_le32(fence_val); 4744 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4745 4746 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4747 sizeof(struct packet_msg_prot), 4748 pkt_dma_addr); 4749 if (rc) { 4750 dev_err(hdev->dev, 4751 "Failed to send fence packet to H/W queue %d\n", 4752 hw_queue_id); 4753 goto free_pkt; 4754 } 4755 4756 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4757 1000, timeout_usec, true); 4758 4759 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4760 4761 if (rc == -ETIMEDOUT) { 4762 dev_err(hdev->dev, 4763 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4764 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4765 rc = -EIO; 4766 } 4767 4768 free_pkt: 4769 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4770 free_fence_ptr: 4771 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4772 return rc; 4773 } 4774 4775 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4776 { 4777 struct gaudi_device *gaudi = hdev->asic_specific; 4778 4779 /* 4780 * check capability here as send_cpu_message() won't update the result 4781 * value if no capability 4782 */ 4783 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4784 return 0; 4785 4786 return hl_fw_test_cpu_queue(hdev); 4787 } 4788 4789 static int gaudi_test_queues(struct hl_device *hdev) 4790 { 4791 int i, rc, ret_val = 0; 4792 4793 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4794 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4795 rc = gaudi_test_queue(hdev, i); 4796 if (rc) 4797 ret_val = -EINVAL; 4798 } 4799 } 4800 4801 rc = gaudi_test_cpu_queue(hdev); 4802 if (rc) 4803 ret_val = -EINVAL; 4804 4805 return ret_val; 4806 } 4807 4808 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4809 gfp_t mem_flags, dma_addr_t *dma_handle) 4810 { 4811 void *kernel_addr; 4812 4813 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4814 return NULL; 4815 4816 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4817 4818 /* Shift to the device's base physical address of host memory */ 4819 if (kernel_addr) 4820 *dma_handle += HOST_PHYS_BASE; 4821 4822 return kernel_addr; 4823 } 4824 4825 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4826 dma_addr_t dma_addr) 4827 { 4828 /* Cancel the device's base physical address of host memory */ 4829 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4830 4831 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4832 } 4833 4834 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4835 size_t size, dma_addr_t *dma_handle) 4836 { 4837 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4838 } 4839 4840 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4841 size_t size, void *vaddr) 4842 { 4843 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4844 } 4845 4846 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4847 { 4848 struct scatterlist *sg, *sg_next_iter; 4849 u32 count, dma_desc_cnt; 4850 u64 len, len_next; 4851 dma_addr_t addr, addr_next; 4852 4853 dma_desc_cnt = 0; 4854 4855 for_each_sgtable_dma_sg(sgt, sg, count) { 4856 len = sg_dma_len(sg); 4857 addr = sg_dma_address(sg); 4858 4859 if (len == 0) 4860 break; 4861 4862 while ((count + 1) < sgt->nents) { 4863 sg_next_iter = sg_next(sg); 4864 len_next = sg_dma_len(sg_next_iter); 4865 addr_next = sg_dma_address(sg_next_iter); 4866 4867 if (len_next == 0) 4868 break; 4869 4870 if ((addr + len == addr_next) && 4871 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4872 len += len_next; 4873 count++; 4874 sg = sg_next_iter; 4875 } else { 4876 break; 4877 } 4878 } 4879 4880 dma_desc_cnt++; 4881 } 4882 4883 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4884 } 4885 4886 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4887 struct hl_cs_parser *parser, 4888 struct packet_lin_dma *user_dma_pkt, 4889 u64 addr, enum dma_data_direction dir) 4890 { 4891 struct hl_userptr *userptr; 4892 int rc; 4893 4894 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4895 parser->job_userptr_list, &userptr)) 4896 goto already_pinned; 4897 4898 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4899 if (!userptr) 4900 return -ENOMEM; 4901 4902 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4903 userptr); 4904 if (rc) 4905 goto free_userptr; 4906 4907 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4908 4909 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); 4910 if (rc) { 4911 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4912 goto unpin_memory; 4913 } 4914 4915 userptr->dma_mapped = true; 4916 userptr->dir = dir; 4917 4918 already_pinned: 4919 parser->patched_cb_size += 4920 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4921 4922 return 0; 4923 4924 unpin_memory: 4925 list_del(&userptr->job_node); 4926 hl_unpin_host_memory(hdev, userptr); 4927 free_userptr: 4928 kfree(userptr); 4929 return rc; 4930 } 4931 4932 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4933 struct hl_cs_parser *parser, 4934 struct packet_lin_dma *user_dma_pkt, 4935 bool src_in_host) 4936 { 4937 enum dma_data_direction dir; 4938 bool skip_host_mem_pin = false, user_memset; 4939 u64 addr; 4940 int rc = 0; 4941 4942 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 4943 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 4944 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 4945 4946 if (src_in_host) { 4947 if (user_memset) 4948 skip_host_mem_pin = true; 4949 4950 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 4951 dir = DMA_TO_DEVICE; 4952 addr = le64_to_cpu(user_dma_pkt->src_addr); 4953 } else { 4954 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 4955 dir = DMA_FROM_DEVICE; 4956 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4957 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4958 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4959 } 4960 4961 if (skip_host_mem_pin) 4962 parser->patched_cb_size += sizeof(*user_dma_pkt); 4963 else 4964 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 4965 addr, dir); 4966 4967 return rc; 4968 } 4969 4970 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 4971 struct hl_cs_parser *parser, 4972 struct packet_lin_dma *user_dma_pkt) 4973 { 4974 bool src_in_host = false; 4975 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4976 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4977 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4978 4979 dev_dbg(hdev->dev, "DMA packet details:\n"); 4980 dev_dbg(hdev->dev, "source == 0x%llx\n", 4981 le64_to_cpu(user_dma_pkt->src_addr)); 4982 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 4983 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 4984 4985 /* 4986 * Special handling for DMA with size 0. Bypass all validations 4987 * because no transactions will be done except for WR_COMP, which 4988 * is not a security issue 4989 */ 4990 if (!le32_to_cpu(user_dma_pkt->tsize)) { 4991 parser->patched_cb_size += sizeof(*user_dma_pkt); 4992 return 0; 4993 } 4994 4995 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 4996 src_in_host = true; 4997 4998 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 4999 src_in_host); 5000 } 5001 5002 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5003 struct hl_cs_parser *parser, 5004 struct packet_load_and_exe *user_pkt) 5005 { 5006 u32 cfg; 5007 5008 cfg = le32_to_cpu(user_pkt->cfg); 5009 5010 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5011 dev_err(hdev->dev, 5012 "User not allowed to use Load and Execute\n"); 5013 return -EPERM; 5014 } 5015 5016 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5017 5018 return 0; 5019 } 5020 5021 static int gaudi_validate_cb(struct hl_device *hdev, 5022 struct hl_cs_parser *parser, bool is_mmu) 5023 { 5024 u32 cb_parsed_length = 0; 5025 int rc = 0; 5026 5027 parser->patched_cb_size = 0; 5028 5029 /* cb_user_size is more than 0 so loop will always be executed */ 5030 while (cb_parsed_length < parser->user_cb_size) { 5031 enum packet_id pkt_id; 5032 u16 pkt_size; 5033 struct gaudi_packet *user_pkt; 5034 5035 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5036 5037 pkt_id = (enum packet_id) ( 5038 (le64_to_cpu(user_pkt->header) & 5039 PACKET_HEADER_PACKET_ID_MASK) >> 5040 PACKET_HEADER_PACKET_ID_SHIFT); 5041 5042 if (!validate_packet_id(pkt_id)) { 5043 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5044 rc = -EINVAL; 5045 break; 5046 } 5047 5048 pkt_size = gaudi_packet_sizes[pkt_id]; 5049 cb_parsed_length += pkt_size; 5050 if (cb_parsed_length > parser->user_cb_size) { 5051 dev_err(hdev->dev, 5052 "packet 0x%x is out of CB boundary\n", pkt_id); 5053 rc = -EINVAL; 5054 break; 5055 } 5056 5057 switch (pkt_id) { 5058 case PACKET_MSG_PROT: 5059 dev_err(hdev->dev, 5060 "User not allowed to use MSG_PROT\n"); 5061 rc = -EPERM; 5062 break; 5063 5064 case PACKET_CP_DMA: 5065 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5066 rc = -EPERM; 5067 break; 5068 5069 case PACKET_STOP: 5070 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5071 rc = -EPERM; 5072 break; 5073 5074 case PACKET_WREG_BULK: 5075 dev_err(hdev->dev, 5076 "User not allowed to use WREG_BULK\n"); 5077 rc = -EPERM; 5078 break; 5079 5080 case PACKET_LOAD_AND_EXE: 5081 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5082 (struct packet_load_and_exe *) user_pkt); 5083 break; 5084 5085 case PACKET_LIN_DMA: 5086 parser->contains_dma_pkt = true; 5087 if (is_mmu) 5088 parser->patched_cb_size += pkt_size; 5089 else 5090 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5091 (struct packet_lin_dma *) user_pkt); 5092 break; 5093 5094 case PACKET_WREG_32: 5095 case PACKET_MSG_LONG: 5096 case PACKET_MSG_SHORT: 5097 case PACKET_REPEAT: 5098 case PACKET_FENCE: 5099 case PACKET_NOP: 5100 case PACKET_ARB_POINT: 5101 parser->patched_cb_size += pkt_size; 5102 break; 5103 5104 default: 5105 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5106 pkt_id); 5107 rc = -EINVAL; 5108 break; 5109 } 5110 5111 if (rc) 5112 break; 5113 } 5114 5115 /* 5116 * The new CB should have space at the end for two MSG_PROT packets: 5117 * 1. Optional NOP padding for cacheline alignment 5118 * 2. A packet that will act as a completion packet 5119 * 3. A packet that will generate MSI interrupt 5120 */ 5121 if (parser->completion) 5122 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5123 parser->patched_cb_size); 5124 5125 return rc; 5126 } 5127 5128 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5129 struct hl_cs_parser *parser, 5130 struct packet_lin_dma *user_dma_pkt, 5131 struct packet_lin_dma *new_dma_pkt, 5132 u32 *new_dma_pkt_size) 5133 { 5134 struct hl_userptr *userptr; 5135 struct scatterlist *sg, *sg_next_iter; 5136 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5137 u64 len, len_next; 5138 dma_addr_t dma_addr, dma_addr_next; 5139 u64 device_memory_addr, addr; 5140 enum dma_data_direction dir; 5141 struct sg_table *sgt; 5142 bool src_in_host = false; 5143 bool skip_host_mem_pin = false; 5144 bool user_memset; 5145 5146 ctl = le32_to_cpu(user_dma_pkt->ctl); 5147 5148 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5149 src_in_host = true; 5150 5151 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5152 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5153 5154 if (src_in_host) { 5155 addr = le64_to_cpu(user_dma_pkt->src_addr); 5156 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5157 dir = DMA_TO_DEVICE; 5158 if (user_memset) 5159 skip_host_mem_pin = true; 5160 } else { 5161 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5162 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5163 dir = DMA_FROM_DEVICE; 5164 } 5165 5166 if ((!skip_host_mem_pin) && 5167 (!hl_userptr_is_pinned(hdev, addr, 5168 le32_to_cpu(user_dma_pkt->tsize), 5169 parser->job_userptr_list, &userptr))) { 5170 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5171 addr, user_dma_pkt->tsize); 5172 return -EFAULT; 5173 } 5174 5175 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5176 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5177 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5178 return 0; 5179 } 5180 5181 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5182 5183 sgt = userptr->sgt; 5184 dma_desc_cnt = 0; 5185 5186 for_each_sgtable_dma_sg(sgt, sg, count) { 5187 len = sg_dma_len(sg); 5188 dma_addr = sg_dma_address(sg); 5189 5190 if (len == 0) 5191 break; 5192 5193 while ((count + 1) < sgt->nents) { 5194 sg_next_iter = sg_next(sg); 5195 len_next = sg_dma_len(sg_next_iter); 5196 dma_addr_next = sg_dma_address(sg_next_iter); 5197 5198 if (len_next == 0) 5199 break; 5200 5201 if ((dma_addr + len == dma_addr_next) && 5202 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5203 len += len_next; 5204 count++; 5205 sg = sg_next_iter; 5206 } else { 5207 break; 5208 } 5209 } 5210 5211 ctl = le32_to_cpu(user_dma_pkt->ctl); 5212 if (likely(dma_desc_cnt)) 5213 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5214 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5215 new_dma_pkt->ctl = cpu_to_le32(ctl); 5216 new_dma_pkt->tsize = cpu_to_le32(len); 5217 5218 if (dir == DMA_TO_DEVICE) { 5219 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5220 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5221 } else { 5222 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5223 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5224 } 5225 5226 if (!user_memset) 5227 device_memory_addr += len; 5228 dma_desc_cnt++; 5229 new_dma_pkt++; 5230 } 5231 5232 if (!dma_desc_cnt) { 5233 dev_err(hdev->dev, 5234 "Error of 0 SG entries when patching DMA packet\n"); 5235 return -EFAULT; 5236 } 5237 5238 /* Fix the last dma packet - wrcomp must be as user set it */ 5239 new_dma_pkt--; 5240 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5241 5242 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5243 5244 return 0; 5245 } 5246 5247 static int gaudi_patch_cb(struct hl_device *hdev, 5248 struct hl_cs_parser *parser) 5249 { 5250 u32 cb_parsed_length = 0; 5251 u32 cb_patched_cur_length = 0; 5252 int rc = 0; 5253 5254 /* cb_user_size is more than 0 so loop will always be executed */ 5255 while (cb_parsed_length < parser->user_cb_size) { 5256 enum packet_id pkt_id; 5257 u16 pkt_size; 5258 u32 new_pkt_size = 0; 5259 struct gaudi_packet *user_pkt, *kernel_pkt; 5260 5261 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5262 kernel_pkt = parser->patched_cb->kernel_address + 5263 cb_patched_cur_length; 5264 5265 pkt_id = (enum packet_id) ( 5266 (le64_to_cpu(user_pkt->header) & 5267 PACKET_HEADER_PACKET_ID_MASK) >> 5268 PACKET_HEADER_PACKET_ID_SHIFT); 5269 5270 if (!validate_packet_id(pkt_id)) { 5271 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5272 rc = -EINVAL; 5273 break; 5274 } 5275 5276 pkt_size = gaudi_packet_sizes[pkt_id]; 5277 cb_parsed_length += pkt_size; 5278 if (cb_parsed_length > parser->user_cb_size) { 5279 dev_err(hdev->dev, 5280 "packet 0x%x is out of CB boundary\n", pkt_id); 5281 rc = -EINVAL; 5282 break; 5283 } 5284 5285 switch (pkt_id) { 5286 case PACKET_LIN_DMA: 5287 rc = gaudi_patch_dma_packet(hdev, parser, 5288 (struct packet_lin_dma *) user_pkt, 5289 (struct packet_lin_dma *) kernel_pkt, 5290 &new_pkt_size); 5291 cb_patched_cur_length += new_pkt_size; 5292 break; 5293 5294 case PACKET_MSG_PROT: 5295 dev_err(hdev->dev, 5296 "User not allowed to use MSG_PROT\n"); 5297 rc = -EPERM; 5298 break; 5299 5300 case PACKET_CP_DMA: 5301 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5302 rc = -EPERM; 5303 break; 5304 5305 case PACKET_STOP: 5306 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5307 rc = -EPERM; 5308 break; 5309 5310 case PACKET_WREG_32: 5311 case PACKET_WREG_BULK: 5312 case PACKET_MSG_LONG: 5313 case PACKET_MSG_SHORT: 5314 case PACKET_REPEAT: 5315 case PACKET_FENCE: 5316 case PACKET_NOP: 5317 case PACKET_ARB_POINT: 5318 case PACKET_LOAD_AND_EXE: 5319 memcpy(kernel_pkt, user_pkt, pkt_size); 5320 cb_patched_cur_length += pkt_size; 5321 break; 5322 5323 default: 5324 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5325 pkt_id); 5326 rc = -EINVAL; 5327 break; 5328 } 5329 5330 if (rc) 5331 break; 5332 } 5333 5334 return rc; 5335 } 5336 5337 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5338 struct hl_cs_parser *parser) 5339 { 5340 u64 handle; 5341 u32 patched_cb_size; 5342 struct hl_cb *user_cb; 5343 int rc; 5344 5345 /* 5346 * The new CB should have space at the end for two MSG_PROT packets: 5347 * 1. Optional NOP padding for cacheline alignment 5348 * 2. A packet that will act as a completion packet 5349 * 3. A packet that will generate MSI interrupt 5350 */ 5351 if (parser->completion) 5352 parser->patched_cb_size = parser->user_cb_size + 5353 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5354 else 5355 parser->patched_cb_size = parser->user_cb_size; 5356 5357 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5358 parser->patched_cb_size, false, false, 5359 &handle); 5360 5361 if (rc) { 5362 dev_err(hdev->dev, 5363 "Failed to allocate patched CB for DMA CS %d\n", 5364 rc); 5365 return rc; 5366 } 5367 5368 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5369 /* hl_cb_get should never fail */ 5370 if (!parser->patched_cb) { 5371 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5372 rc = -EFAULT; 5373 goto out; 5374 } 5375 5376 /* 5377 * We are protected from overflow because the check 5378 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5379 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5380 * 5381 * There is no option to reach here without going through that check because: 5382 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5383 * an external queue. 5384 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5385 */ 5386 memcpy(parser->patched_cb->kernel_address, 5387 parser->user_cb->kernel_address, 5388 parser->user_cb_size); 5389 5390 patched_cb_size = parser->patched_cb_size; 5391 5392 /* Validate patched CB instead of user CB */ 5393 user_cb = parser->user_cb; 5394 parser->user_cb = parser->patched_cb; 5395 rc = gaudi_validate_cb(hdev, parser, true); 5396 parser->user_cb = user_cb; 5397 5398 if (rc) { 5399 hl_cb_put(parser->patched_cb); 5400 goto out; 5401 } 5402 5403 if (patched_cb_size != parser->patched_cb_size) { 5404 dev_err(hdev->dev, "user CB size mismatch\n"); 5405 hl_cb_put(parser->patched_cb); 5406 rc = -EINVAL; 5407 goto out; 5408 } 5409 5410 out: 5411 /* 5412 * Always call cb destroy here because we still have 1 reference 5413 * to it by calling cb_get earlier. After the job will be completed, 5414 * cb_put will release it, but here we want to remove it from the 5415 * idr 5416 */ 5417 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5418 5419 return rc; 5420 } 5421 5422 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5423 struct hl_cs_parser *parser) 5424 { 5425 u64 handle; 5426 int rc; 5427 5428 rc = gaudi_validate_cb(hdev, parser, false); 5429 5430 if (rc) 5431 goto free_userptr; 5432 5433 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5434 parser->patched_cb_size, false, false, 5435 &handle); 5436 if (rc) { 5437 dev_err(hdev->dev, 5438 "Failed to allocate patched CB for DMA CS %d\n", rc); 5439 goto free_userptr; 5440 } 5441 5442 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5443 /* hl_cb_get should never fail here */ 5444 if (!parser->patched_cb) { 5445 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5446 rc = -EFAULT; 5447 goto out; 5448 } 5449 5450 rc = gaudi_patch_cb(hdev, parser); 5451 5452 if (rc) 5453 hl_cb_put(parser->patched_cb); 5454 5455 out: 5456 /* 5457 * Always call cb destroy here because we still have 1 reference 5458 * to it by calling cb_get earlier. After the job will be completed, 5459 * cb_put will release it, but here we want to remove it from the 5460 * idr 5461 */ 5462 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5463 5464 free_userptr: 5465 if (rc) 5466 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5467 return rc; 5468 } 5469 5470 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5471 struct hl_cs_parser *parser) 5472 { 5473 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5474 struct gaudi_device *gaudi = hdev->asic_specific; 5475 u32 nic_queue_offset, nic_mask_q_id; 5476 5477 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5478 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5479 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5480 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5481 5482 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5483 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5484 return -EINVAL; 5485 } 5486 } 5487 5488 /* For internal queue jobs just check if CB address is valid */ 5489 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5490 parser->user_cb_size, 5491 asic_prop->sram_user_base_address, 5492 asic_prop->sram_end_address)) 5493 return 0; 5494 5495 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5496 parser->user_cb_size, 5497 asic_prop->dram_user_base_address, 5498 asic_prop->dram_end_address)) 5499 return 0; 5500 5501 /* PMMU and HPMMU addresses are equal, check only one of them */ 5502 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5503 parser->user_cb_size, 5504 asic_prop->pmmu.start_addr, 5505 asic_prop->pmmu.end_addr)) 5506 return 0; 5507 5508 dev_err(hdev->dev, 5509 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5510 parser->user_cb, parser->user_cb_size); 5511 5512 return -EFAULT; 5513 } 5514 5515 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5516 { 5517 struct gaudi_device *gaudi = hdev->asic_specific; 5518 5519 if (parser->queue_type == QUEUE_TYPE_INT) 5520 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5521 5522 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5523 return gaudi_parse_cb_mmu(hdev, parser); 5524 else 5525 return gaudi_parse_cb_no_mmu(hdev, parser); 5526 } 5527 5528 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5529 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5530 u32 msi_vec, bool eb) 5531 { 5532 struct packet_msg_prot *cq_pkt; 5533 struct packet_nop *cq_padding; 5534 u64 msi_addr; 5535 u32 tmp; 5536 5537 cq_padding = kernel_address + original_len; 5538 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5539 5540 while ((void *)cq_padding < (void *)cq_pkt) { 5541 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5542 cq_padding++; 5543 } 5544 5545 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5546 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5547 5548 if (eb) 5549 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5550 5551 cq_pkt->ctl = cpu_to_le32(tmp); 5552 cq_pkt->value = cpu_to_le32(cq_val); 5553 cq_pkt->addr = cpu_to_le64(cq_addr); 5554 5555 cq_pkt++; 5556 5557 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5558 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5559 cq_pkt->ctl = cpu_to_le32(tmp); 5560 cq_pkt->value = cpu_to_le32(1); 5561 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; 5562 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5563 } 5564 5565 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5566 { 5567 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5568 } 5569 5570 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5571 u32 size, u64 val) 5572 { 5573 struct packet_lin_dma *lin_dma_pkt; 5574 struct hl_cs_job *job; 5575 u32 cb_size, ctl, err_cause; 5576 struct hl_cb *cb; 5577 int rc; 5578 5579 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5580 if (!cb) 5581 return -EFAULT; 5582 5583 lin_dma_pkt = cb->kernel_address; 5584 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5585 cb_size = sizeof(*lin_dma_pkt); 5586 5587 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5588 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5589 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5590 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5591 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5592 5593 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5594 lin_dma_pkt->src_addr = cpu_to_le64(val); 5595 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5596 lin_dma_pkt->tsize = cpu_to_le32(size); 5597 5598 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5599 if (!job) { 5600 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5601 rc = -ENOMEM; 5602 goto release_cb; 5603 } 5604 5605 /* Verify DMA is OK */ 5606 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5607 if (err_cause && !hdev->init_done) { 5608 dev_dbg(hdev->dev, 5609 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5610 err_cause); 5611 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5612 } 5613 5614 job->id = 0; 5615 job->user_cb = cb; 5616 atomic_inc(&job->user_cb->cs_cnt); 5617 job->user_cb_size = cb_size; 5618 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5619 job->patched_cb = job->user_cb; 5620 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5621 5622 hl_debugfs_add_job(hdev, job); 5623 5624 rc = gaudi_send_job_on_qman0(hdev, job); 5625 hl_debugfs_remove_job(hdev, job); 5626 kfree(job); 5627 atomic_dec(&cb->cs_cnt); 5628 5629 /* Verify DMA is OK */ 5630 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5631 if (err_cause) { 5632 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5633 rc = -EIO; 5634 if (!hdev->init_done) { 5635 dev_dbg(hdev->dev, 5636 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5637 err_cause); 5638 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5639 } 5640 } 5641 5642 release_cb: 5643 hl_cb_put(cb); 5644 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5645 5646 return rc; 5647 } 5648 5649 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5650 u32 num_regs, u32 val) 5651 { 5652 struct packet_msg_long *pkt; 5653 struct hl_cs_job *job; 5654 u32 cb_size, ctl; 5655 struct hl_cb *cb; 5656 int i, rc; 5657 5658 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5659 5660 if (cb_size > SZ_2M) { 5661 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5662 return -ENOMEM; 5663 } 5664 5665 cb = hl_cb_kernel_create(hdev, cb_size, false); 5666 if (!cb) 5667 return -EFAULT; 5668 5669 pkt = cb->kernel_address; 5670 5671 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5672 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5674 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5675 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5676 5677 for (i = 0; i < num_regs ; i++, pkt++) { 5678 pkt->ctl = cpu_to_le32(ctl); 5679 pkt->value = cpu_to_le32(val); 5680 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5681 } 5682 5683 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5684 if (!job) { 5685 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5686 rc = -ENOMEM; 5687 goto release_cb; 5688 } 5689 5690 job->id = 0; 5691 job->user_cb = cb; 5692 atomic_inc(&job->user_cb->cs_cnt); 5693 job->user_cb_size = cb_size; 5694 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5695 job->patched_cb = job->user_cb; 5696 job->job_cb_size = cb_size; 5697 5698 hl_debugfs_add_job(hdev, job); 5699 5700 rc = gaudi_send_job_on_qman0(hdev, job); 5701 hl_debugfs_remove_job(hdev, job); 5702 kfree(job); 5703 atomic_dec(&cb->cs_cnt); 5704 5705 release_cb: 5706 hl_cb_put(cb); 5707 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5708 5709 return rc; 5710 } 5711 5712 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5713 { 5714 u64 base_addr; 5715 u32 num_regs; 5716 int rc; 5717 5718 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5719 num_regs = NUM_OF_SOB_IN_BLOCK; 5720 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5721 if (rc) { 5722 dev_err(hdev->dev, "failed resetting SM registers"); 5723 return -ENOMEM; 5724 } 5725 5726 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5727 num_regs = NUM_OF_SOB_IN_BLOCK; 5728 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5729 if (rc) { 5730 dev_err(hdev->dev, "failed resetting SM registers"); 5731 return -ENOMEM; 5732 } 5733 5734 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5735 num_regs = NUM_OF_SOB_IN_BLOCK; 5736 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5737 if (rc) { 5738 dev_err(hdev->dev, "failed resetting SM registers"); 5739 return -ENOMEM; 5740 } 5741 5742 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5743 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5744 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5745 if (rc) { 5746 dev_err(hdev->dev, "failed resetting SM registers"); 5747 return -ENOMEM; 5748 } 5749 5750 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5751 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5752 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5753 if (rc) { 5754 dev_err(hdev->dev, "failed resetting SM registers"); 5755 return -ENOMEM; 5756 } 5757 5758 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5759 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5760 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5761 if (rc) { 5762 dev_err(hdev->dev, "failed resetting SM registers"); 5763 return -ENOMEM; 5764 } 5765 5766 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5767 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5768 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5769 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5770 if (rc) { 5771 dev_err(hdev->dev, "failed resetting SM registers"); 5772 return -ENOMEM; 5773 } 5774 5775 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5776 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5777 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5778 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5779 if (rc) { 5780 dev_err(hdev->dev, "failed resetting SM registers"); 5781 return -ENOMEM; 5782 } 5783 5784 return 0; 5785 } 5786 5787 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5788 { 5789 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5790 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5791 int i; 5792 5793 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5794 u64 sob_addr = CFG_BASE + 5795 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5796 (i * sob_delta); 5797 u32 dma_offset = i * DMA_CORE_OFFSET; 5798 5799 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5800 lower_32_bits(sob_addr)); 5801 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5802 upper_32_bits(sob_addr)); 5803 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5804 5805 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5806 * modified by the user for SRAM reduction 5807 */ 5808 if (i > 1) 5809 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5810 0x00000001); 5811 } 5812 } 5813 5814 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5815 { 5816 u32 qman_offset; 5817 int i; 5818 5819 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5820 qman_offset = i * DMA_QMAN_OFFSET; 5821 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5822 } 5823 5824 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5825 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5826 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5827 } 5828 5829 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5830 qman_offset = i * TPC_QMAN_OFFSET; 5831 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5832 } 5833 5834 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5835 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5836 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5837 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5838 } 5839 } 5840 5841 static int gaudi_restore_user_registers(struct hl_device *hdev) 5842 { 5843 int rc; 5844 5845 rc = gaudi_restore_sm_registers(hdev); 5846 if (rc) 5847 return rc; 5848 5849 gaudi_restore_dma_registers(hdev); 5850 gaudi_restore_qm_registers(hdev); 5851 5852 return 0; 5853 } 5854 5855 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5856 { 5857 return 0; 5858 } 5859 5860 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5861 { 5862 u32 size = hdev->asic_prop.mmu_pgt_size + 5863 hdev->asic_prop.mmu_cache_mng_size; 5864 struct gaudi_device *gaudi = hdev->asic_specific; 5865 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5866 5867 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5868 return 0; 5869 5870 return gaudi_memset_device_memory(hdev, addr, size, 0); 5871 } 5872 5873 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5874 { 5875 5876 } 5877 5878 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5879 u32 size_to_dma, dma_addr_t dma_addr) 5880 { 5881 u32 err_cause, val; 5882 u64 dma_offset; 5883 int rc; 5884 5885 dma_offset = dma_id * DMA_CORE_OFFSET; 5886 5887 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5888 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5889 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5890 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5891 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5892 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5893 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5894 5895 rc = hl_poll_timeout( 5896 hdev, 5897 mmDMA0_CORE_STS0 + dma_offset, 5898 val, 5899 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5900 0, 5901 1000000); 5902 5903 if (rc) { 5904 dev_err(hdev->dev, 5905 "DMA %d timed-out during reading of 0x%llx\n", 5906 dma_id, addr); 5907 return -EIO; 5908 } 5909 5910 /* Verify DMA is OK */ 5911 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5912 if (err_cause) { 5913 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5914 dev_dbg(hdev->dev, 5915 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5916 err_cause); 5917 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5918 5919 return -EIO; 5920 } 5921 5922 return 0; 5923 } 5924 5925 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5926 void *blob_addr) 5927 { 5928 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5929 u32 qm_glbl_sts0, qm_cgm_sts; 5930 u64 dma_offset, qm_offset; 5931 dma_addr_t dma_addr; 5932 void *kernel_addr; 5933 bool is_eng_idle; 5934 int rc = 0, dma_id; 5935 5936 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 5937 5938 if (!kernel_addr) 5939 return -ENOMEM; 5940 5941 hdev->asic_funcs->hw_queues_lock(hdev); 5942 5943 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 5944 dma_offset = dma_id * DMA_CORE_OFFSET; 5945 qm_offset = dma_id * DMA_QMAN_OFFSET; 5946 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5947 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5948 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5949 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5950 IS_DMA_IDLE(dma_core_sts0); 5951 5952 if (!is_eng_idle) { 5953 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 5954 dma_offset = dma_id * DMA_CORE_OFFSET; 5955 qm_offset = dma_id * DMA_QMAN_OFFSET; 5956 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5957 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5958 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5959 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5960 IS_DMA_IDLE(dma_core_sts0); 5961 5962 if (!is_eng_idle) { 5963 dev_err_ratelimited(hdev->dev, 5964 "Can't read via DMA because it is BUSY\n"); 5965 rc = -EAGAIN; 5966 goto out; 5967 } 5968 } 5969 5970 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 5971 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 5972 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 5973 5974 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 5975 * using the compute ctx ASID, if exists. If not, use the kernel ctx 5976 * ASID 5977 */ 5978 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 5979 5980 /* Verify DMA is OK */ 5981 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5982 if (err_cause) { 5983 dev_dbg(hdev->dev, 5984 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5985 err_cause); 5986 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5987 } 5988 5989 pos = 0; 5990 size_left = size; 5991 size_to_dma = SZ_2M; 5992 5993 while (size_left > 0) { 5994 5995 if (size_left < SZ_2M) 5996 size_to_dma = size_left; 5997 5998 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 5999 dma_addr); 6000 if (rc) 6001 break; 6002 6003 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6004 6005 if (size_left <= SZ_2M) 6006 break; 6007 6008 pos += SZ_2M; 6009 addr += SZ_2M; 6010 size_left -= SZ_2M; 6011 } 6012 6013 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6014 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6015 * ASID 6016 */ 6017 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6018 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6019 6020 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6021 6022 out: 6023 hdev->asic_funcs->hw_queues_unlock(hdev); 6024 6025 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6026 6027 return rc; 6028 } 6029 6030 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6031 { 6032 struct gaudi_device *gaudi = hdev->asic_specific; 6033 6034 if (hdev->reset_info.hard_reset_pending) 6035 return U64_MAX; 6036 6037 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6038 (addr - gaudi->hbm_bar_cur_addr)); 6039 } 6040 6041 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6042 { 6043 struct gaudi_device *gaudi = hdev->asic_specific; 6044 6045 if (hdev->reset_info.hard_reset_pending) 6046 return; 6047 6048 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6049 (addr - gaudi->hbm_bar_cur_addr)); 6050 } 6051 6052 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6053 { 6054 /* mask to zero the MMBP and ASID bits */ 6055 WREG32_AND(reg, ~0x7FF); 6056 WREG32_OR(reg, asid); 6057 } 6058 6059 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6060 { 6061 struct gaudi_device *gaudi = hdev->asic_specific; 6062 6063 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6064 return; 6065 6066 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6067 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6068 return; 6069 } 6070 6071 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6074 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6075 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6076 6077 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6080 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6081 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6082 6083 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6086 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6087 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6088 6089 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6092 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6093 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6094 6095 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6098 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6099 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6100 6101 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6104 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6105 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6106 6107 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6110 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6111 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6112 6113 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6116 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6117 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6118 6119 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6120 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6121 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6122 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6123 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6124 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6125 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6126 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6127 6128 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6133 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6134 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6135 6136 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6141 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6142 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6143 6144 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6149 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6151 6152 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6159 6160 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6167 6168 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6173 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6175 6176 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6183 6184 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6191 6192 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6202 6203 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6207 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6213 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6214 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6215 6216 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6217 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6218 asid); 6219 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6220 asid); 6221 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6222 asid); 6223 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6224 asid); 6225 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6226 asid); 6227 } 6228 6229 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6231 asid); 6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6233 asid); 6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6235 asid); 6236 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6237 asid); 6238 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6239 asid); 6240 } 6241 6242 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6243 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6244 asid); 6245 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6246 asid); 6247 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6248 asid); 6249 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6250 asid); 6251 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6252 asid); 6253 } 6254 6255 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6257 asid); 6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6259 asid); 6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6261 asid); 6262 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6263 asid); 6264 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6265 asid); 6266 } 6267 6268 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6269 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6270 asid); 6271 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6272 asid); 6273 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6274 asid); 6275 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6276 asid); 6277 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6278 asid); 6279 } 6280 6281 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6283 asid); 6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6285 asid); 6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6287 asid); 6288 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6289 asid); 6290 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6291 asid); 6292 } 6293 6294 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6295 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6296 asid); 6297 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6298 asid); 6299 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6300 asid); 6301 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6302 asid); 6303 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6304 asid); 6305 } 6306 6307 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6309 asid); 6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6311 asid); 6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6313 asid); 6314 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6315 asid); 6316 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6317 asid); 6318 } 6319 6320 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6321 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6322 asid); 6323 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6324 asid); 6325 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6326 asid); 6327 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6328 asid); 6329 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6330 asid); 6331 } 6332 6333 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6335 asid); 6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6337 asid); 6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6339 asid); 6340 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6341 asid); 6342 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6343 asid); 6344 } 6345 6346 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6347 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6348 } 6349 6350 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6351 struct hl_cs_job *job) 6352 { 6353 struct packet_msg_prot *fence_pkt; 6354 u32 *fence_ptr; 6355 dma_addr_t fence_dma_addr; 6356 struct hl_cb *cb; 6357 u32 tmp, timeout, dma_offset; 6358 int rc; 6359 6360 if (hdev->pldm) 6361 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6362 else 6363 timeout = HL_DEVICE_TIMEOUT_USEC; 6364 6365 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6366 if (!fence_ptr) { 6367 dev_err(hdev->dev, 6368 "Failed to allocate fence memory for QMAN0\n"); 6369 return -ENOMEM; 6370 } 6371 6372 cb = job->patched_cb; 6373 6374 fence_pkt = cb->kernel_address + 6375 job->job_cb_size - sizeof(struct packet_msg_prot); 6376 6377 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6378 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6379 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6380 6381 fence_pkt->ctl = cpu_to_le32(tmp); 6382 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6383 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6384 6385 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6386 6387 WREG32(mmDMA0_CORE_PROT + dma_offset, 6388 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6389 6390 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6391 job->job_cb_size, cb->bus_address); 6392 if (rc) { 6393 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6394 goto free_fence_ptr; 6395 } 6396 6397 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6398 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6399 timeout, true); 6400 6401 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6402 6403 if (rc == -ETIMEDOUT) { 6404 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6405 goto free_fence_ptr; 6406 } 6407 6408 free_fence_ptr: 6409 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6410 6411 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6412 return rc; 6413 } 6414 6415 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6416 { 6417 if (event_type >= GAUDI_EVENT_SIZE) 6418 goto event_not_supported; 6419 6420 if (!gaudi_irq_map_table[event_type].valid) 6421 goto event_not_supported; 6422 6423 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6424 6425 return; 6426 6427 event_not_supported: 6428 snprintf(desc, size, "N/A"); 6429 } 6430 6431 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6432 bool is_write, u16 *engine_id_1, 6433 u16 *engine_id_2) 6434 { 6435 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6436 6437 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6438 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6439 6440 switch (x_y) { 6441 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6442 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6443 dma_id[0] = 0; 6444 dma_id[1] = 2; 6445 break; 6446 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6447 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6448 dma_id[0] = 1; 6449 dma_id[1] = 3; 6450 break; 6451 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6453 dma_id[0] = 4; 6454 dma_id[1] = 6; 6455 break; 6456 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6458 dma_id[0] = 5; 6459 dma_id[1] = 7; 6460 break; 6461 default: 6462 goto unknown_initiator; 6463 } 6464 6465 for (i = 0 ; i < 2 ; i++) { 6466 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6467 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6468 } 6469 6470 switch (x_y) { 6471 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6472 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6473 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6474 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6475 return "DMA0"; 6476 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6477 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6478 return "DMA2"; 6479 } else { 6480 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6481 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6482 return "DMA0 or DMA2"; 6483 } 6484 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6485 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6486 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6487 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6488 return "DMA1"; 6489 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6490 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6491 return "DMA3"; 6492 } else { 6493 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6494 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6495 return "DMA1 or DMA3"; 6496 } 6497 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6498 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6499 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6500 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6501 return "DMA4"; 6502 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6503 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6504 return "DMA6"; 6505 } else { 6506 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6507 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6508 return "DMA4 or DMA6"; 6509 } 6510 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6511 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6512 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6513 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6514 return "DMA5"; 6515 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6516 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6517 return "DMA7"; 6518 } else { 6519 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6520 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6521 return "DMA5 or DMA7"; 6522 } 6523 } 6524 6525 unknown_initiator: 6526 return "unknown initiator"; 6527 } 6528 6529 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6530 u16 *engine_id_1, u16 *engine_id_2) 6531 { 6532 u32 val, x_y, axi_id; 6533 6534 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6535 RREG32(mmMMU_UP_RAZWI_READ_ID); 6536 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6537 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6538 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6539 RAZWI_INITIATOR_AXI_ID_SHIFT); 6540 6541 switch (x_y) { 6542 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6543 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6544 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6545 return "TPC0"; 6546 } 6547 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6548 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6549 return "NIC0"; 6550 } 6551 break; 6552 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6553 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6554 return "TPC1"; 6555 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6556 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6557 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6558 return "MME0"; 6559 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6560 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6561 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6562 return "MME1"; 6563 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6564 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6565 return "TPC2"; 6566 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6567 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6568 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6569 return "TPC3"; 6570 } 6571 /* PCI, CPU or PSOC does not have engine id*/ 6572 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6573 return "PCI"; 6574 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6575 return "CPU"; 6576 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6577 return "PSOC"; 6578 break; 6579 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6587 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6588 engine_id_1, engine_id_2); 6589 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6590 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6591 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6592 return "TPC4"; 6593 } 6594 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6595 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6596 return "NIC1"; 6597 } 6598 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6599 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6600 return "NIC2"; 6601 } 6602 break; 6603 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6604 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6605 return "TPC5"; 6606 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6607 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6608 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6609 return "MME2"; 6610 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6611 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6612 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6613 return "MME3"; 6614 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6615 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6616 return "TPC6"; 6617 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6618 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6619 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6620 return "TPC7"; 6621 } 6622 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6623 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6624 return "NIC4"; 6625 } 6626 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6627 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6628 return "NIC5"; 6629 } 6630 break; 6631 default: 6632 break; 6633 } 6634 6635 dev_err(hdev->dev, 6636 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6637 val, 6638 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6639 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6640 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6641 RAZWI_INITIATOR_AXI_ID_MASK); 6642 6643 return "unknown initiator"; 6644 } 6645 6646 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6647 u16 *engine_id_2, bool *is_read, bool *is_write) 6648 { 6649 6650 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6651 dev_err_ratelimited(hdev->dev, 6652 "RAZWI event caused by illegal write of %s\n", 6653 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6654 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6655 *is_write = true; 6656 } 6657 6658 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6659 dev_err_ratelimited(hdev->dev, 6660 "RAZWI event caused by illegal read of %s\n", 6661 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6662 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6663 *is_read = true; 6664 } 6665 } 6666 6667 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6668 { 6669 struct gaudi_device *gaudi = hdev->asic_specific; 6670 u32 val; 6671 6672 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6673 return; 6674 6675 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6676 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6677 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6678 *addr <<= 32; 6679 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6680 6681 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6682 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6683 6684 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6685 } 6686 6687 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6688 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6689 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6690 *addr <<= 32; 6691 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6692 6693 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6694 6695 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6696 } 6697 } 6698 6699 /* 6700 * +-------------------+------------------------------------------------------+ 6701 * | Configuration Reg | Description | 6702 * | Address | | 6703 * +-------------------+------------------------------------------------------+ 6704 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6705 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6706 * | |0xF34 memory wrappers 63:32 | 6707 * | |0xF38 memory wrappers 95:64 | 6708 * | |0xF3C memory wrappers 127:96 | 6709 * +-------------------+------------------------------------------------------+ 6710 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6711 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6712 * | |0xF44 memory wrappers 63:32 | 6713 * | |0xF48 memory wrappers 95:64 | 6714 * | |0xF4C memory wrappers 127:96 | 6715 * +-------------------+------------------------------------------------------+ 6716 */ 6717 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6718 struct ecc_info_extract_params *params, u64 *ecc_address, 6719 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6720 { 6721 u32 i, num_mem_regs, reg, err_bit; 6722 u64 err_addr, err_word = 0; 6723 6724 num_mem_regs = params->num_memories / 32 + 6725 ((params->num_memories % 32) ? 1 : 0); 6726 6727 if (params->block_address >= CFG_BASE) 6728 params->block_address -= CFG_BASE; 6729 6730 if (params->derr) 6731 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6732 else 6733 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6734 6735 /* Set invalid wrapper index */ 6736 *memory_wrapper_idx = 0xFF; 6737 6738 /* Iterate through memory wrappers, a single bit must be set */ 6739 for (i = 0 ; i < num_mem_regs ; i++) { 6740 err_addr += i * 4; 6741 err_word = RREG32(err_addr); 6742 if (err_word) { 6743 err_bit = __ffs(err_word); 6744 *memory_wrapper_idx = err_bit + (32 * i); 6745 break; 6746 } 6747 } 6748 6749 if (*memory_wrapper_idx == 0xFF) { 6750 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6751 return -EINVAL; 6752 } 6753 6754 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6755 *memory_wrapper_idx); 6756 6757 *ecc_address = 6758 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6759 *ecc_syndrom = 6760 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6761 6762 /* Clear error indication */ 6763 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6764 if (params->derr) 6765 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6766 else 6767 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6768 6769 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6770 6771 return 0; 6772 } 6773 6774 /* 6775 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6776 * 6777 * @idx: the current pi/ci value 6778 * @q_len: the queue length (power of 2) 6779 * 6780 * @return the cyclically decremented index 6781 */ 6782 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6783 { 6784 u32 mask = q_len - 1; 6785 6786 /* 6787 * modular decrement is equivalent to adding (queue_size -1) 6788 * later we take LSBs to make sure the value is in the 6789 * range [0, queue_len - 1] 6790 */ 6791 return (idx + q_len - 1) & mask; 6792 } 6793 6794 /** 6795 * gaudi_handle_sw_config_stream_data - print SW config stream data 6796 * 6797 * @hdev: pointer to the habanalabs device structure 6798 * @stream: the QMAN's stream 6799 * @qman_base: base address of QMAN registers block 6800 * @event_mask: mask of the last events occurred 6801 */ 6802 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6803 u64 qman_base, u64 event_mask) 6804 { 6805 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6806 u32 cq_ptr_lo_off, size; 6807 6808 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6809 6810 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6811 stream * cq_ptr_lo_off; 6812 cq_ptr_hi = cq_ptr_lo + 6813 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6814 cq_tsize = cq_ptr_lo + 6815 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6816 6817 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6818 size = RREG32(cq_tsize); 6819 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6820 stream, cq_ptr, size); 6821 6822 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6823 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6824 hdev->captured_err_info.undef_opcode.cq_size = size; 6825 hdev->captured_err_info.undef_opcode.stream_id = stream; 6826 } 6827 } 6828 6829 /** 6830 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6831 * 6832 * @hdev: pointer to the habanalabs device structure 6833 * @qid_base: first QID of the QMAN (out of 4 streams) 6834 * @stream: the QMAN's stream 6835 * @qman_base: base address of QMAN registers block 6836 * @event_mask: mask of the last events occurred 6837 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6838 */ 6839 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6840 u32 stream, u64 qman_base, 6841 u64 event_mask, 6842 bool pr_sw_conf) 6843 { 6844 u32 ci, qm_ci_stream_off, queue_len; 6845 struct hl_hw_queue *q; 6846 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6847 int i; 6848 6849 q = &hdev->kernel_queues[qid_base + stream]; 6850 6851 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6852 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6853 stream * qm_ci_stream_off; 6854 6855 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6856 q->int_queue_len : HL_QUEUE_LENGTH; 6857 6858 hdev->asic_funcs->hw_queues_lock(hdev); 6859 6860 if (pr_sw_conf) 6861 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6862 6863 ci = RREG32(pq_ci); 6864 6865 /* we should start printing form ci -1 */ 6866 ci = gaudi_queue_idx_dec(ci, queue_len); 6867 memset(addr, 0, sizeof(addr)); 6868 6869 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6870 struct hl_bd *bd; 6871 u32 len; 6872 6873 bd = q->kernel_address; 6874 bd += ci; 6875 6876 len = le32_to_cpu(bd->len); 6877 /* len 0 means uninitialized entry- break */ 6878 if (!len) 6879 break; 6880 6881 addr[i] = le64_to_cpu(bd->ptr); 6882 6883 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6884 stream, ci, addr[i], len); 6885 6886 /* get previous ci, wrap if needed */ 6887 ci = gaudi_queue_idx_dec(ci, queue_len); 6888 } 6889 6890 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6891 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6892 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6893 6894 if (arr_idx == 0) { 6895 undef_opcode->timestamp = ktime_get(); 6896 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6897 } 6898 6899 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6900 undef_opcode->cb_addr_streams_len++; 6901 } 6902 6903 hdev->asic_funcs->hw_queues_unlock(hdev); 6904 } 6905 6906 /** 6907 * handle_qman_data_on_err - extract QMAN data on error 6908 * 6909 * @hdev: pointer to the habanalabs device structure 6910 * @qid_base: first QID of the QMAN (out of 4 streams) 6911 * @stream: the QMAN's stream 6912 * @qman_base: base address of QMAN registers block 6913 * @event_mask: mask of the last events occurred 6914 * 6915 * This function attempt to exatract as much data as possible on QMAN error. 6916 * On upper CP print the SW config stream data and last 8 PQEs. 6917 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6918 */ 6919 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6920 u32 stream, u64 qman_base, u64 event_mask) 6921 { 6922 u32 i; 6923 6924 if (stream != QMAN_STREAMS) { 6925 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6926 qman_base, event_mask, true); 6927 return; 6928 } 6929 6930 /* handle Lower-CP */ 6931 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6932 6933 for (i = 0; i < QMAN_STREAMS; i++) 6934 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 6935 qman_base, event_mask, false); 6936 } 6937 6938 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 6939 const char *qm_name, 6940 u64 qman_base, 6941 u32 qid_base, 6942 u64 *event_mask) 6943 { 6944 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 6945 u64 glbl_sts_addr, arb_err_addr; 6946 char reg_desc[32]; 6947 6948 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 6949 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 6950 6951 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 6952 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 6953 glbl_sts_clr_val = 0; 6954 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 6955 6956 if (!glbl_sts_val) 6957 continue; 6958 6959 if (i == QMAN_STREAMS) 6960 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 6961 else 6962 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 6963 6964 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 6965 if (glbl_sts_val & BIT(j)) { 6966 dev_err_ratelimited(hdev->dev, 6967 "%s %s. err cause: %s\n", 6968 qm_name, reg_desc, 6969 gaudi_qman_error_cause[j]); 6970 glbl_sts_clr_val |= BIT(j); 6971 } 6972 } 6973 /* check for undefined opcode */ 6974 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 6975 hdev->captured_err_info.undef_opcode.write_enable) { 6976 memset(&hdev->captured_err_info.undef_opcode, 0, 6977 sizeof(hdev->captured_err_info.undef_opcode)); 6978 6979 hdev->captured_err_info.undef_opcode.write_enable = false; 6980 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 6981 } 6982 6983 /* Write 1 clear errors */ 6984 if (!hdev->stop_on_err) 6985 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 6986 else 6987 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 6988 } 6989 6990 arb_err_val = RREG32(arb_err_addr); 6991 6992 if (!arb_err_val) 6993 return; 6994 6995 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 6996 if (arb_err_val & BIT(j)) { 6997 dev_err_ratelimited(hdev->dev, 6998 "%s ARB_ERR. err cause: %s\n", 6999 qm_name, 7000 gaudi_qman_arb_error_cause[j]); 7001 } 7002 } 7003 } 7004 7005 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7006 struct hl_eq_sm_sei_data *sei_data) 7007 { 7008 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7009 7010 /* Flip the bits as the enum is ordered in the opposite way */ 7011 index = (index ^ 0x3) & 0x3; 7012 7013 switch (sei_data->sei_cause) { 7014 case SM_SEI_SO_OVERFLOW: 7015 dev_err_ratelimited(hdev->dev, 7016 "%s SEI Error: SOB Group %u overflow/underflow", 7017 gaudi_sync_manager_names[index], 7018 le32_to_cpu(sei_data->sei_log)); 7019 break; 7020 case SM_SEI_LBW_4B_UNALIGNED: 7021 dev_err_ratelimited(hdev->dev, 7022 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7023 gaudi_sync_manager_names[index], 7024 le32_to_cpu(sei_data->sei_log)); 7025 break; 7026 case SM_SEI_AXI_RESPONSE_ERR: 7027 dev_err_ratelimited(hdev->dev, 7028 "%s SEI Error: AXI ID %u response error", 7029 gaudi_sync_manager_names[index], 7030 le32_to_cpu(sei_data->sei_log)); 7031 break; 7032 default: 7033 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7034 le32_to_cpu(sei_data->sei_log)); 7035 break; 7036 } 7037 } 7038 7039 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7040 struct hl_eq_ecc_data *ecc_data) 7041 { 7042 struct ecc_info_extract_params params; 7043 u64 ecc_address = 0, ecc_syndrom = 0; 7044 u8 index, memory_wrapper_idx = 0; 7045 bool extract_info_from_fw; 7046 int rc; 7047 7048 if (hdev->asic_prop.fw_security_enabled) { 7049 extract_info_from_fw = true; 7050 goto extract_ecc_info; 7051 } 7052 7053 switch (event_type) { 7054 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7055 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7056 extract_info_from_fw = true; 7057 break; 7058 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7059 index = event_type - GAUDI_EVENT_TPC0_SERR; 7060 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7061 params.num_memories = 90; 7062 params.derr = false; 7063 extract_info_from_fw = false; 7064 break; 7065 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7066 index = event_type - GAUDI_EVENT_TPC0_DERR; 7067 params.block_address = 7068 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7069 params.num_memories = 90; 7070 params.derr = true; 7071 extract_info_from_fw = false; 7072 break; 7073 case GAUDI_EVENT_MME0_ACC_SERR: 7074 case GAUDI_EVENT_MME1_ACC_SERR: 7075 case GAUDI_EVENT_MME2_ACC_SERR: 7076 case GAUDI_EVENT_MME3_ACC_SERR: 7077 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7078 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7079 params.num_memories = 128; 7080 params.derr = false; 7081 extract_info_from_fw = false; 7082 break; 7083 case GAUDI_EVENT_MME0_ACC_DERR: 7084 case GAUDI_EVENT_MME1_ACC_DERR: 7085 case GAUDI_EVENT_MME2_ACC_DERR: 7086 case GAUDI_EVENT_MME3_ACC_DERR: 7087 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7088 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7089 params.num_memories = 128; 7090 params.derr = true; 7091 extract_info_from_fw = false; 7092 break; 7093 case GAUDI_EVENT_MME0_SBAB_SERR: 7094 case GAUDI_EVENT_MME1_SBAB_SERR: 7095 case GAUDI_EVENT_MME2_SBAB_SERR: 7096 case GAUDI_EVENT_MME3_SBAB_SERR: 7097 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7098 params.block_address = 7099 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7100 params.num_memories = 33; 7101 params.derr = false; 7102 extract_info_from_fw = false; 7103 break; 7104 case GAUDI_EVENT_MME0_SBAB_DERR: 7105 case GAUDI_EVENT_MME1_SBAB_DERR: 7106 case GAUDI_EVENT_MME2_SBAB_DERR: 7107 case GAUDI_EVENT_MME3_SBAB_DERR: 7108 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7109 params.block_address = 7110 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7111 params.num_memories = 33; 7112 params.derr = true; 7113 extract_info_from_fw = false; 7114 break; 7115 default: 7116 return; 7117 } 7118 7119 extract_ecc_info: 7120 if (extract_info_from_fw) { 7121 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7122 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7123 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7124 } else { 7125 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7126 &ecc_syndrom, &memory_wrapper_idx); 7127 if (rc) 7128 return; 7129 } 7130 7131 dev_err(hdev->dev, 7132 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7133 ecc_address, ecc_syndrom, memory_wrapper_idx); 7134 } 7135 7136 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7137 { 7138 u64 qman_base; 7139 char desc[32]; 7140 u32 qid_base; 7141 u8 index; 7142 7143 switch (event_type) { 7144 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7145 index = event_type - GAUDI_EVENT_TPC0_QM; 7146 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7147 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7148 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7149 break; 7150 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7151 if (event_type == GAUDI_EVENT_MME0_QM) { 7152 index = 0; 7153 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7154 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7155 index = 2; 7156 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7157 } 7158 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7159 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7160 break; 7161 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7162 index = event_type - GAUDI_EVENT_DMA0_QM; 7163 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7164 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7165 if (index > 1) 7166 qid_base++; 7167 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7168 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7169 break; 7170 case GAUDI_EVENT_NIC0_QM0: 7171 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7172 qman_base = mmNIC0_QM0_BASE; 7173 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7174 break; 7175 case GAUDI_EVENT_NIC0_QM1: 7176 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7177 qman_base = mmNIC0_QM1_BASE; 7178 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7179 break; 7180 case GAUDI_EVENT_NIC1_QM0: 7181 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7182 qman_base = mmNIC1_QM0_BASE; 7183 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7184 break; 7185 case GAUDI_EVENT_NIC1_QM1: 7186 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7187 qman_base = mmNIC1_QM1_BASE; 7188 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7189 break; 7190 case GAUDI_EVENT_NIC2_QM0: 7191 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7192 qman_base = mmNIC2_QM0_BASE; 7193 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7194 break; 7195 case GAUDI_EVENT_NIC2_QM1: 7196 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7197 qman_base = mmNIC2_QM1_BASE; 7198 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7199 break; 7200 case GAUDI_EVENT_NIC3_QM0: 7201 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7202 qman_base = mmNIC3_QM0_BASE; 7203 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7204 break; 7205 case GAUDI_EVENT_NIC3_QM1: 7206 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7207 qman_base = mmNIC3_QM1_BASE; 7208 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7209 break; 7210 case GAUDI_EVENT_NIC4_QM0: 7211 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7212 qman_base = mmNIC4_QM0_BASE; 7213 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7214 break; 7215 case GAUDI_EVENT_NIC4_QM1: 7216 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7217 qman_base = mmNIC4_QM1_BASE; 7218 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7219 break; 7220 default: 7221 return; 7222 } 7223 7224 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7225 } 7226 7227 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7228 bool check_razwi, u64 *event_mask) 7229 { 7230 bool is_read = false, is_write = false; 7231 u16 engine_id[2], num_of_razwi_eng = 0; 7232 char desc[64] = ""; 7233 u64 razwi_addr = 0; 7234 u8 razwi_flags = 0; 7235 7236 /* 7237 * Init engine id by default as not valid and only if razwi initiated from engine with 7238 * engine id it will get valid value. 7239 */ 7240 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7241 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7242 7243 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7244 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7245 event_type, desc); 7246 7247 if (check_razwi) { 7248 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7249 &is_write); 7250 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7251 7252 if (is_read) 7253 razwi_flags |= HL_RAZWI_READ; 7254 if (is_write) 7255 razwi_flags |= HL_RAZWI_WRITE; 7256 7257 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7258 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7259 num_of_razwi_eng = 2; 7260 else 7261 num_of_razwi_eng = 1; 7262 } 7263 7264 if (razwi_flags) 7265 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7266 razwi_flags, event_mask); 7267 } 7268 } 7269 7270 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7271 struct cpucp_pkt_sync_err *sync_err) 7272 { 7273 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7274 7275 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7276 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7277 } 7278 7279 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7280 struct hl_eq_fw_alive *fw_alive) 7281 { 7282 dev_err(hdev->dev, 7283 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7284 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7285 le32_to_cpu(fw_alive->process_id), 7286 le32_to_cpu(fw_alive->thread_id), 7287 le64_to_cpu(fw_alive->uptime_seconds)); 7288 } 7289 7290 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7291 void *data) 7292 { 7293 char desc[64] = "", *type; 7294 struct eq_nic_sei_event *eq_nic_sei = data; 7295 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7296 7297 switch (eq_nic_sei->axi_error_cause) { 7298 case RXB: 7299 type = "RXB"; 7300 break; 7301 case RXE: 7302 type = "RXE"; 7303 break; 7304 case TXS: 7305 type = "TXS"; 7306 break; 7307 case TXE: 7308 type = "TXE"; 7309 break; 7310 case QPC_RESP: 7311 type = "QPC_RESP"; 7312 break; 7313 case NON_AXI_ERR: 7314 type = "NON_AXI_ERR"; 7315 break; 7316 case TMR: 7317 type = "TMR"; 7318 break; 7319 default: 7320 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7321 eq_nic_sei->axi_error_cause); 7322 type = "N/A"; 7323 break; 7324 } 7325 7326 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7327 eq_nic_sei->id); 7328 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7329 event_type, desc); 7330 } 7331 7332 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7333 { 7334 /* GAUDI doesn't support any reset except hard-reset */ 7335 return -EPERM; 7336 } 7337 7338 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7339 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7340 { 7341 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7342 int rc = 0; 7343 7344 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7345 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7346 if (!hbm_ecc_data) { 7347 dev_err(hdev->dev, "No FW ECC data"); 7348 return 0; 7349 } 7350 7351 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7352 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7353 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7354 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7355 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7356 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7357 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7358 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7359 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7360 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7361 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7362 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7363 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7364 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7365 7366 dev_err(hdev->dev, 7367 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7368 device, ch, wr_par, rd_par, ca_par, serr, derr); 7369 dev_err(hdev->dev, 7370 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7371 device, ch, hbm_ecc_data->first_addr, type, 7372 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7373 hbm_ecc_data->dec_cnt); 7374 return 0; 7375 } 7376 7377 if (hdev->asic_prop.fw_security_enabled) { 7378 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7379 return 0; 7380 } 7381 7382 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7383 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7384 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7385 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7386 if (val) { 7387 rc = -EIO; 7388 dev_err(hdev->dev, 7389 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7390 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7391 (val >> 2) & 0x1, (val >> 3) & 0x1, 7392 (val >> 4) & 0x1); 7393 7394 val2 = RREG32(base + ch * 0x1000 + 0x060); 7395 dev_err(hdev->dev, 7396 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7397 device, ch * 2, 7398 RREG32(base + ch * 0x1000 + 0x064), 7399 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7400 (val2 & 0xFF0000) >> 16, 7401 (val2 & 0xFF000000) >> 24); 7402 } 7403 7404 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7405 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7406 if (val) { 7407 rc = -EIO; 7408 dev_err(hdev->dev, 7409 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7410 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7411 (val >> 2) & 0x1, (val >> 3) & 0x1, 7412 (val >> 4) & 0x1); 7413 7414 val2 = RREG32(base + ch * 0x1000 + 0x070); 7415 dev_err(hdev->dev, 7416 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7417 device, ch * 2 + 1, 7418 RREG32(base + ch * 0x1000 + 0x074), 7419 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7420 (val2 & 0xFF0000) >> 16, 7421 (val2 & 0xFF000000) >> 24); 7422 } 7423 7424 /* Clear interrupts */ 7425 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7426 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7427 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7428 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7429 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7430 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7431 } 7432 7433 val = RREG32(base + 0x8F30); 7434 val2 = RREG32(base + 0x8F34); 7435 if (val | val2) { 7436 rc = -EIO; 7437 dev_err(hdev->dev, 7438 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7439 device, val, val2); 7440 } 7441 val = RREG32(base + 0x8F40); 7442 val2 = RREG32(base + 0x8F44); 7443 if (val | val2) { 7444 rc = -EIO; 7445 dev_err(hdev->dev, 7446 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7447 device, val, val2); 7448 } 7449 7450 return rc; 7451 } 7452 7453 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7454 { 7455 switch (hbm_event_type) { 7456 case GAUDI_EVENT_HBM0_SPI_0: 7457 case GAUDI_EVENT_HBM0_SPI_1: 7458 return 0; 7459 case GAUDI_EVENT_HBM1_SPI_0: 7460 case GAUDI_EVENT_HBM1_SPI_1: 7461 return 1; 7462 case GAUDI_EVENT_HBM2_SPI_0: 7463 case GAUDI_EVENT_HBM2_SPI_1: 7464 return 2; 7465 case GAUDI_EVENT_HBM3_SPI_0: 7466 case GAUDI_EVENT_HBM3_SPI_1: 7467 return 3; 7468 default: 7469 break; 7470 } 7471 7472 /* Should never happen */ 7473 return 0; 7474 } 7475 7476 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7477 char *interrupt_name) 7478 { 7479 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7480 bool soft_reset_required = false; 7481 7482 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7483 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7484 7485 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7486 if (tpc_interrupts_cause & BIT(i)) { 7487 dev_err_ratelimited(hdev->dev, 7488 "TPC%d_%s interrupt cause: %s\n", 7489 tpc_id, interrupt_name, 7490 gaudi_tpc_interrupts_cause[i]); 7491 /* If this is QM error, we need to soft-reset */ 7492 if (i == 15) 7493 soft_reset_required = true; 7494 } 7495 7496 /* Clear interrupts */ 7497 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7498 7499 return soft_reset_required; 7500 } 7501 7502 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7503 { 7504 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7505 } 7506 7507 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7508 { 7509 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7510 } 7511 7512 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7513 { 7514 ktime_t zero_time = ktime_set(0, 0); 7515 7516 mutex_lock(&hdev->clk_throttling.lock); 7517 7518 switch (event_type) { 7519 case GAUDI_EVENT_FIX_POWER_ENV_S: 7520 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7521 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7522 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7523 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7524 dev_info_ratelimited(hdev->dev, 7525 "Clock throttling due to power consumption\n"); 7526 break; 7527 7528 case GAUDI_EVENT_FIX_POWER_ENV_E: 7529 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7530 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7531 dev_info_ratelimited(hdev->dev, 7532 "Power envelop is safe, back to optimal clock\n"); 7533 break; 7534 7535 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7536 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7537 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7538 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7540 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7541 dev_info_ratelimited(hdev->dev, 7542 "Clock throttling due to overheating\n"); 7543 break; 7544 7545 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7546 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7547 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7548 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7549 dev_info_ratelimited(hdev->dev, 7550 "Thermal envelop is safe, back to optimal clock\n"); 7551 break; 7552 7553 default: 7554 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7555 event_type); 7556 break; 7557 } 7558 7559 mutex_unlock(&hdev->clk_throttling.lock); 7560 } 7561 7562 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7563 { 7564 struct gaudi_device *gaudi = hdev->asic_specific; 7565 struct hl_info_fw_err_info fw_err_info; 7566 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7567 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7568 u32 fw_fatal_err_flag = 0, flags = 0; 7569 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7570 >> EQ_CTL_EVENT_TYPE_SHIFT); 7571 bool reset_required, reset_direct = false; 7572 u8 cause; 7573 int rc; 7574 7575 if (event_type >= GAUDI_EVENT_SIZE) { 7576 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7577 event_type, GAUDI_EVENT_SIZE - 1); 7578 return; 7579 } 7580 7581 gaudi->events_stat[event_type]++; 7582 gaudi->events_stat_aggregate[event_type]++; 7583 7584 switch (event_type) { 7585 case GAUDI_EVENT_PCIE_CORE_DERR: 7586 case GAUDI_EVENT_PCIE_IF_DERR: 7587 case GAUDI_EVENT_PCIE_PHY_DERR: 7588 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7589 case GAUDI_EVENT_MME0_ACC_DERR: 7590 case GAUDI_EVENT_MME0_SBAB_DERR: 7591 case GAUDI_EVENT_MME1_ACC_DERR: 7592 case GAUDI_EVENT_MME1_SBAB_DERR: 7593 case GAUDI_EVENT_MME2_ACC_DERR: 7594 case GAUDI_EVENT_MME2_SBAB_DERR: 7595 case GAUDI_EVENT_MME3_ACC_DERR: 7596 case GAUDI_EVENT_MME3_SBAB_DERR: 7597 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7598 fallthrough; 7599 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7600 case GAUDI_EVENT_PSOC_MEM_DERR: 7601 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7602 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7603 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7604 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7605 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7606 case GAUDI_EVENT_MMU_DERR: 7607 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7608 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7609 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7610 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7611 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7612 goto reset_device; 7613 7614 case GAUDI_EVENT_GIC500: 7615 case GAUDI_EVENT_AXI_ECC: 7616 case GAUDI_EVENT_L2_RAM_ECC: 7617 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7618 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7619 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7620 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7621 goto reset_device; 7622 7623 case GAUDI_EVENT_HBM0_SPI_0: 7624 case GAUDI_EVENT_HBM1_SPI_0: 7625 case GAUDI_EVENT_HBM2_SPI_0: 7626 case GAUDI_EVENT_HBM3_SPI_0: 7627 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7628 gaudi_hbm_read_interrupts(hdev, 7629 gaudi_hbm_event_to_dev(event_type), 7630 &eq_entry->hbm_ecc_data); 7631 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7632 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7633 goto reset_device; 7634 7635 case GAUDI_EVENT_HBM0_SPI_1: 7636 case GAUDI_EVENT_HBM1_SPI_1: 7637 case GAUDI_EVENT_HBM2_SPI_1: 7638 case GAUDI_EVENT_HBM3_SPI_1: 7639 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7640 gaudi_hbm_read_interrupts(hdev, 7641 gaudi_hbm_event_to_dev(event_type), 7642 &eq_entry->hbm_ecc_data); 7643 hl_fw_unmask_irq(hdev, event_type); 7644 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7645 break; 7646 7647 case GAUDI_EVENT_TPC0_DEC: 7648 case GAUDI_EVENT_TPC1_DEC: 7649 case GAUDI_EVENT_TPC2_DEC: 7650 case GAUDI_EVENT_TPC3_DEC: 7651 case GAUDI_EVENT_TPC4_DEC: 7652 case GAUDI_EVENT_TPC5_DEC: 7653 case GAUDI_EVENT_TPC6_DEC: 7654 case GAUDI_EVENT_TPC7_DEC: 7655 /* In TPC DEC event, notify on TPC assertion. While there isn't 7656 * a specific event for assertion yet, the FW generates TPC DEC event. 7657 * The SW upper layer will inspect an internal mapped area to indicate 7658 * if the event is a TPC Assertion or a "real" TPC DEC. 7659 */ 7660 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7661 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7662 reset_required = gaudi_tpc_read_interrupts(hdev, 7663 tpc_dec_event_to_tpc_id(event_type), 7664 "AXI_SLV_DEC_Error"); 7665 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7666 if (reset_required) { 7667 dev_err(hdev->dev, "reset required due to %s\n", 7668 gaudi_irq_map_table[event_type].name); 7669 7670 reset_direct = true; 7671 goto reset_device; 7672 } else { 7673 hl_fw_unmask_irq(hdev, event_type); 7674 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7675 } 7676 break; 7677 7678 case GAUDI_EVENT_TPC0_KRN_ERR: 7679 case GAUDI_EVENT_TPC1_KRN_ERR: 7680 case GAUDI_EVENT_TPC2_KRN_ERR: 7681 case GAUDI_EVENT_TPC3_KRN_ERR: 7682 case GAUDI_EVENT_TPC4_KRN_ERR: 7683 case GAUDI_EVENT_TPC5_KRN_ERR: 7684 case GAUDI_EVENT_TPC6_KRN_ERR: 7685 case GAUDI_EVENT_TPC7_KRN_ERR: 7686 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7687 reset_required = gaudi_tpc_read_interrupts(hdev, 7688 tpc_krn_event_to_tpc_id(event_type), 7689 "KRN_ERR"); 7690 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7691 if (reset_required) { 7692 dev_err(hdev->dev, "reset required due to %s\n", 7693 gaudi_irq_map_table[event_type].name); 7694 7695 reset_direct = true; 7696 goto reset_device; 7697 } else { 7698 hl_fw_unmask_irq(hdev, event_type); 7699 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7700 } 7701 break; 7702 7703 case GAUDI_EVENT_PCIE_CORE_SERR: 7704 case GAUDI_EVENT_PCIE_IF_SERR: 7705 case GAUDI_EVENT_PCIE_PHY_SERR: 7706 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7707 case GAUDI_EVENT_MME0_ACC_SERR: 7708 case GAUDI_EVENT_MME0_SBAB_SERR: 7709 case GAUDI_EVENT_MME1_ACC_SERR: 7710 case GAUDI_EVENT_MME1_SBAB_SERR: 7711 case GAUDI_EVENT_MME2_ACC_SERR: 7712 case GAUDI_EVENT_MME2_SBAB_SERR: 7713 case GAUDI_EVENT_MME3_ACC_SERR: 7714 case GAUDI_EVENT_MME3_SBAB_SERR: 7715 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7716 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7717 case GAUDI_EVENT_PSOC_MEM_SERR: 7718 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7719 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7720 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7721 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7722 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7723 fallthrough; 7724 case GAUDI_EVENT_MMU_SERR: 7725 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7726 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7727 hl_fw_unmask_irq(hdev, event_type); 7728 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7729 break; 7730 7731 case GAUDI_EVENT_PCIE_DEC: 7732 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7733 case GAUDI_EVENT_PSOC_AXI_DEC: 7734 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7735 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7736 hl_fw_unmask_irq(hdev, event_type); 7737 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7738 break; 7739 7740 case GAUDI_EVENT_MMU_PAGE_FAULT: 7741 case GAUDI_EVENT_MMU_WR_PERM: 7742 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7743 hl_fw_unmask_irq(hdev, event_type); 7744 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7745 break; 7746 7747 case GAUDI_EVENT_MME0_WBC_RSP: 7748 case GAUDI_EVENT_MME0_SBAB0_RSP: 7749 case GAUDI_EVENT_MME1_WBC_RSP: 7750 case GAUDI_EVENT_MME1_SBAB0_RSP: 7751 case GAUDI_EVENT_MME2_WBC_RSP: 7752 case GAUDI_EVENT_MME2_SBAB0_RSP: 7753 case GAUDI_EVENT_MME3_WBC_RSP: 7754 case GAUDI_EVENT_MME3_SBAB0_RSP: 7755 case GAUDI_EVENT_RAZWI_OR_ADC: 7756 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7757 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7758 fallthrough; 7759 case GAUDI_EVENT_NIC0_QM0: 7760 case GAUDI_EVENT_NIC0_QM1: 7761 case GAUDI_EVENT_NIC1_QM0: 7762 case GAUDI_EVENT_NIC1_QM1: 7763 case GAUDI_EVENT_NIC2_QM0: 7764 case GAUDI_EVENT_NIC2_QM1: 7765 case GAUDI_EVENT_NIC3_QM0: 7766 case GAUDI_EVENT_NIC3_QM1: 7767 case GAUDI_EVENT_NIC4_QM0: 7768 case GAUDI_EVENT_NIC4_QM1: 7769 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7770 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7771 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7772 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7773 hl_fw_unmask_irq(hdev, event_type); 7774 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7775 break; 7776 7777 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7778 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7779 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7780 goto reset_device; 7781 7782 case GAUDI_EVENT_TPC0_BMON_SPMU: 7783 case GAUDI_EVENT_TPC1_BMON_SPMU: 7784 case GAUDI_EVENT_TPC2_BMON_SPMU: 7785 case GAUDI_EVENT_TPC3_BMON_SPMU: 7786 case GAUDI_EVENT_TPC4_BMON_SPMU: 7787 case GAUDI_EVENT_TPC5_BMON_SPMU: 7788 case GAUDI_EVENT_TPC6_BMON_SPMU: 7789 case GAUDI_EVENT_TPC7_BMON_SPMU: 7790 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7791 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7792 hl_fw_unmask_irq(hdev, event_type); 7793 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7794 break; 7795 7796 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7797 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7798 hl_fw_unmask_irq(hdev, event_type); 7799 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7800 break; 7801 7802 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7803 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7804 gaudi_print_sm_sei_info(hdev, event_type, 7805 &eq_entry->sm_sei_data); 7806 rc = hl_state_dump(hdev); 7807 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7808 if (rc) 7809 dev_err(hdev->dev, 7810 "Error during system state dump %d\n", rc); 7811 hl_fw_unmask_irq(hdev, event_type); 7812 break; 7813 7814 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7815 break; 7816 7817 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7818 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7819 hl_fw_unmask_irq(hdev, event_type); 7820 break; 7821 7822 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7823 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7824 dev_err(hdev->dev, 7825 "Received high temp H/W interrupt %d (cause %d)\n", 7826 event_type, cause); 7827 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7828 break; 7829 7830 case GAUDI_EVENT_DEV_RESET_REQ: 7831 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7832 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7833 goto reset_device; 7834 7835 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7836 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7837 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7838 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7839 goto reset_device; 7840 7841 case GAUDI_EVENT_FW_ALIVE_S: 7842 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7843 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7844 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7845 fw_err_info.event_id = event_type; 7846 fw_err_info.event_mask = &event_mask; 7847 hl_handle_fw_err(hdev, &fw_err_info); 7848 goto reset_device; 7849 7850 default: 7851 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7852 event_type); 7853 break; 7854 } 7855 7856 if (event_mask) 7857 hl_notifier_event_send_all(hdev, event_mask); 7858 7859 return; 7860 7861 reset_device: 7862 reset_required = true; 7863 7864 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7865 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7866 7867 /* notify on device unavailable while the reset triggered by fw */ 7868 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7869 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7870 } else if (hdev->hard_reset_on_fw_events) { 7871 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7872 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7873 } else { 7874 reset_required = false; 7875 } 7876 7877 if (reset_required) { 7878 /* escalate general hw errors to critical/fatal error */ 7879 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7880 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7881 7882 hl_device_cond_reset(hdev, flags, event_mask); 7883 } else { 7884 hl_fw_unmask_irq(hdev, event_type); 7885 /* Notification on occurred event needs to be sent although reset is not executed */ 7886 if (event_mask) 7887 hl_notifier_event_send_all(hdev, event_mask); 7888 } 7889 } 7890 7891 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7892 { 7893 struct gaudi_device *gaudi = hdev->asic_specific; 7894 7895 if (aggregate) { 7896 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7897 return gaudi->events_stat_aggregate; 7898 } 7899 7900 *size = (u32) sizeof(gaudi->events_stat); 7901 return gaudi->events_stat; 7902 } 7903 7904 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7905 { 7906 struct gaudi_device *gaudi = hdev->asic_specific; 7907 u32 status, timeout_usec; 7908 int rc; 7909 7910 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7911 hdev->reset_info.hard_reset_pending) 7912 return 0; 7913 7914 if (hdev->pldm) 7915 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7916 else 7917 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7918 7919 /* L0 & L1 invalidation */ 7920 WREG32(mmSTLB_INV_PS, 3); 7921 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7922 WREG32(mmSTLB_INV_PS, 2); 7923 7924 rc = hl_poll_timeout( 7925 hdev, 7926 mmSTLB_INV_PS, 7927 status, 7928 !status, 7929 1000, 7930 timeout_usec); 7931 7932 WREG32(mmSTLB_INV_SET, 0); 7933 7934 return rc; 7935 } 7936 7937 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 7938 bool is_hard, u32 flags, 7939 u32 asid, u64 va, u64 size) 7940 { 7941 /* Treat as invalidate all because there is no range invalidation 7942 * in Gaudi 7943 */ 7944 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 7945 } 7946 7947 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 7948 { 7949 u32 status, timeout_usec; 7950 int rc; 7951 7952 if (hdev->pldm) 7953 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7954 else 7955 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7956 7957 WREG32(MMU_ASID, asid); 7958 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 7959 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 7960 WREG32(MMU_BUSY, 0x80000000); 7961 7962 rc = hl_poll_timeout( 7963 hdev, 7964 MMU_BUSY, 7965 status, 7966 !(status & 0x80000000), 7967 1000, 7968 timeout_usec); 7969 7970 if (rc) { 7971 dev_err(hdev->dev, 7972 "Timeout during MMU hop0 config of asid %d\n", asid); 7973 return rc; 7974 } 7975 7976 return 0; 7977 } 7978 7979 static int gaudi_send_heartbeat(struct hl_device *hdev) 7980 { 7981 struct gaudi_device *gaudi = hdev->asic_specific; 7982 7983 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7984 return 0; 7985 7986 return hl_fw_send_heartbeat(hdev); 7987 } 7988 7989 static int gaudi_cpucp_info_get(struct hl_device *hdev) 7990 { 7991 struct gaudi_device *gaudi = hdev->asic_specific; 7992 struct asic_fixed_properties *prop = &hdev->asic_prop; 7993 int rc; 7994 7995 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7996 return 0; 7997 7998 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 7999 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8000 mmCPU_BOOT_ERR1); 8001 if (rc) 8002 return rc; 8003 8004 if (!strlen(prop->cpucp_info.card_name)) 8005 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8006 CARD_NAME_MAX_LEN); 8007 8008 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8009 8010 set_default_power_values(hdev); 8011 8012 return 0; 8013 } 8014 8015 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8016 struct engines_data *e) 8017 { 8018 struct gaudi_device *gaudi = hdev->asic_specific; 8019 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8020 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8021 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8022 unsigned long *mask = (unsigned long *)mask_arr; 8023 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8024 bool is_idle = true, is_eng_idle, is_slave; 8025 u64 offset; 8026 int i, dma_id, port; 8027 8028 if (e) 8029 hl_engine_data_sprintf(e, 8030 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8031 "--- ------- ------------ ---------- -------------\n"); 8032 8033 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8034 dma_id = gaudi_dma_assignment[i]; 8035 offset = dma_id * DMA_QMAN_OFFSET; 8036 8037 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8038 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8039 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8040 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8041 IS_DMA_IDLE(dma_core_sts0); 8042 is_idle &= is_eng_idle; 8043 8044 if (mask && !is_eng_idle) 8045 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8046 if (e) 8047 hl_engine_data_sprintf(e, fmt, dma_id, 8048 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8049 qm_cgm_sts, dma_core_sts0); 8050 } 8051 8052 if (e) 8053 hl_engine_data_sprintf(e, 8054 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8055 "--- ------- ------------ ---------- ----------\n"); 8056 8057 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8058 offset = i * TPC_QMAN_OFFSET; 8059 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8060 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8061 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8062 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8063 IS_TPC_IDLE(tpc_cfg_sts); 8064 is_idle &= is_eng_idle; 8065 8066 if (mask && !is_eng_idle) 8067 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8068 if (e) 8069 hl_engine_data_sprintf(e, fmt, i, 8070 is_eng_idle ? "Y" : "N", 8071 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8072 } 8073 8074 if (e) 8075 hl_engine_data_sprintf(e, 8076 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8077 "--- ------- ------------ ---------- -----------\n"); 8078 8079 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8080 offset = i * MME_QMAN_OFFSET; 8081 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8082 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8083 8084 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8085 is_slave = i % 2; 8086 if (!is_slave) { 8087 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8088 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8089 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8090 } 8091 8092 is_idle &= is_eng_idle; 8093 8094 if (mask && !is_eng_idle) 8095 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8096 if (e) { 8097 if (!is_slave) 8098 hl_engine_data_sprintf(e, fmt, i, 8099 is_eng_idle ? "Y" : "N", 8100 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8101 else 8102 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8103 is_eng_idle ? "Y" : "N", "-", 8104 "-", mme_arch_sts); 8105 } 8106 } 8107 8108 if (e) 8109 hl_engine_data_sprintf(e, 8110 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8111 "--- ------- ------------ ----------\n"); 8112 8113 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8114 offset = i * NIC_MACRO_QMAN_OFFSET; 8115 port = 2 * i; 8116 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8117 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8118 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8119 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8120 is_idle &= is_eng_idle; 8121 8122 if (mask && !is_eng_idle) 8123 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8124 if (e) 8125 hl_engine_data_sprintf(e, nic_fmt, port, 8126 is_eng_idle ? "Y" : "N", 8127 qm_glbl_sts0, qm_cgm_sts); 8128 } 8129 8130 port = 2 * i + 1; 8131 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8132 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8133 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8134 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8135 is_idle &= is_eng_idle; 8136 8137 if (mask && !is_eng_idle) 8138 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8139 if (e) 8140 hl_engine_data_sprintf(e, nic_fmt, port, 8141 is_eng_idle ? "Y" : "N", 8142 qm_glbl_sts0, qm_cgm_sts); 8143 } 8144 } 8145 8146 if (e) 8147 hl_engine_data_sprintf(e, "\n"); 8148 8149 return is_idle; 8150 } 8151 8152 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8153 __acquires(&gaudi->hw_queues_lock) 8154 { 8155 struct gaudi_device *gaudi = hdev->asic_specific; 8156 8157 spin_lock(&gaudi->hw_queues_lock); 8158 } 8159 8160 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8161 __releases(&gaudi->hw_queues_lock) 8162 { 8163 struct gaudi_device *gaudi = hdev->asic_specific; 8164 8165 spin_unlock(&gaudi->hw_queues_lock); 8166 } 8167 8168 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8169 { 8170 return hdev->pdev->device; 8171 } 8172 8173 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8174 size_t max_size) 8175 { 8176 struct gaudi_device *gaudi = hdev->asic_specific; 8177 8178 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8179 return 0; 8180 8181 return hl_fw_get_eeprom_data(hdev, data, max_size); 8182 } 8183 8184 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8185 { 8186 struct gaudi_device *gaudi = hdev->asic_specific; 8187 8188 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8189 return 0; 8190 8191 return hl_fw_get_monitor_dump(hdev, data); 8192 } 8193 8194 /* 8195 * this function should be used only during initialization and/or after reset, 8196 * when there are no active users. 8197 */ 8198 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8199 { 8200 u64 kernel_timeout; 8201 u32 status, offset; 8202 int rc; 8203 8204 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8205 8206 if (hdev->pldm) 8207 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8208 else 8209 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8210 8211 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8212 lower_32_bits(tpc_kernel)); 8213 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8214 upper_32_bits(tpc_kernel)); 8215 8216 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8217 lower_32_bits(tpc_kernel)); 8218 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8219 upper_32_bits(tpc_kernel)); 8220 /* set a valid LUT pointer, content is of no significance */ 8221 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8222 lower_32_bits(tpc_kernel)); 8223 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8224 upper_32_bits(tpc_kernel)); 8225 8226 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8227 lower_32_bits(CFG_BASE + 8228 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8229 8230 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8231 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8232 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8233 /* wait a bit for the engine to start executing */ 8234 usleep_range(1000, 1500); 8235 8236 /* wait until engine has finished executing */ 8237 rc = hl_poll_timeout( 8238 hdev, 8239 mmTPC0_CFG_STATUS + offset, 8240 status, 8241 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8242 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8243 1000, 8244 kernel_timeout); 8245 8246 if (rc) { 8247 dev_err(hdev->dev, 8248 "Timeout while waiting for TPC%d icache prefetch\n", 8249 tpc_id); 8250 return -EIO; 8251 } 8252 8253 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8254 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8255 8256 /* wait a bit for the engine to start executing */ 8257 usleep_range(1000, 1500); 8258 8259 /* wait until engine has finished executing */ 8260 rc = hl_poll_timeout( 8261 hdev, 8262 mmTPC0_CFG_STATUS + offset, 8263 status, 8264 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8265 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8266 1000, 8267 kernel_timeout); 8268 8269 if (rc) { 8270 dev_err(hdev->dev, 8271 "Timeout while waiting for TPC%d vector pipe\n", 8272 tpc_id); 8273 return -EIO; 8274 } 8275 8276 rc = hl_poll_timeout( 8277 hdev, 8278 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8279 status, 8280 (status == 0), 8281 1000, 8282 kernel_timeout); 8283 8284 if (rc) { 8285 dev_err(hdev->dev, 8286 "Timeout while waiting for TPC%d kernel to execute\n", 8287 tpc_id); 8288 return -EIO; 8289 } 8290 8291 return 0; 8292 } 8293 8294 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8295 struct hl_ctx *ctx) 8296 { 8297 struct gaudi_device *gaudi = hdev->asic_specific; 8298 int min_alloc_order, rc, collective_cb_size; 8299 8300 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8301 return 0; 8302 8303 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8304 HOST_SPACE_INTERNAL_CB_SZ, 8305 &hdev->internal_cb_pool_dma_addr, 8306 GFP_KERNEL | __GFP_ZERO); 8307 8308 if (!hdev->internal_cb_pool_virt_addr) 8309 return -ENOMEM; 8310 8311 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8312 sizeof(struct packet_fence); 8313 min_alloc_order = ilog2(collective_cb_size); 8314 8315 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8316 if (!hdev->internal_cb_pool) { 8317 dev_err(hdev->dev, 8318 "Failed to create internal CB pool\n"); 8319 rc = -ENOMEM; 8320 goto free_internal_cb_pool; 8321 } 8322 8323 rc = gen_pool_add(hdev->internal_cb_pool, 8324 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8325 HOST_SPACE_INTERNAL_CB_SZ, -1); 8326 if (rc) { 8327 dev_err(hdev->dev, 8328 "Failed to add memory to internal CB pool\n"); 8329 rc = -EFAULT; 8330 goto destroy_internal_cb_pool; 8331 } 8332 8333 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8334 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8335 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8336 8337 if (!hdev->internal_cb_va_base) { 8338 rc = -ENOMEM; 8339 goto destroy_internal_cb_pool; 8340 } 8341 8342 mutex_lock(&hdev->mmu_lock); 8343 8344 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8345 hdev->internal_cb_pool_dma_addr, 8346 HOST_SPACE_INTERNAL_CB_SZ); 8347 if (rc) 8348 goto unreserve_internal_cb_pool; 8349 8350 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8351 if (rc) 8352 goto unmap_internal_cb_pool; 8353 8354 mutex_unlock(&hdev->mmu_lock); 8355 8356 return 0; 8357 8358 unmap_internal_cb_pool: 8359 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8360 HOST_SPACE_INTERNAL_CB_SZ); 8361 unreserve_internal_cb_pool: 8362 mutex_unlock(&hdev->mmu_lock); 8363 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8364 HOST_SPACE_INTERNAL_CB_SZ); 8365 destroy_internal_cb_pool: 8366 gen_pool_destroy(hdev->internal_cb_pool); 8367 free_internal_cb_pool: 8368 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8369 hdev->internal_cb_pool_dma_addr); 8370 8371 return rc; 8372 } 8373 8374 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8375 struct hl_ctx *ctx) 8376 { 8377 struct gaudi_device *gaudi = hdev->asic_specific; 8378 8379 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8380 return; 8381 8382 mutex_lock(&hdev->mmu_lock); 8383 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8384 HOST_SPACE_INTERNAL_CB_SZ); 8385 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8386 HOST_SPACE_INTERNAL_CB_SZ); 8387 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8388 mutex_unlock(&hdev->mmu_lock); 8389 8390 gen_pool_destroy(hdev->internal_cb_pool); 8391 8392 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8393 hdev->internal_cb_pool_dma_addr); 8394 } 8395 8396 static int gaudi_ctx_init(struct hl_ctx *ctx) 8397 { 8398 int rc; 8399 8400 if (ctx->asid == HL_KERNEL_ASID_ID) 8401 return 0; 8402 8403 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8404 if (rc) 8405 return rc; 8406 8407 rc = gaudi_restore_user_registers(ctx->hdev); 8408 if (rc) 8409 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8410 8411 return rc; 8412 } 8413 8414 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8415 { 8416 if (ctx->asid == HL_KERNEL_ASID_ID) 8417 return; 8418 8419 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8420 } 8421 8422 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8423 { 8424 return 0; 8425 } 8426 8427 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8428 { 8429 return gaudi_cq_assignment[cq_idx]; 8430 } 8431 8432 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8433 { 8434 return sizeof(struct packet_msg_short) + 8435 sizeof(struct packet_msg_prot) * 2; 8436 } 8437 8438 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8439 { 8440 return sizeof(struct packet_msg_short) * 4 + 8441 sizeof(struct packet_fence) + 8442 sizeof(struct packet_msg_prot) * 2; 8443 } 8444 8445 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8446 { 8447 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8448 } 8449 8450 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8451 u32 size, bool eb) 8452 { 8453 struct hl_cb *cb = (struct hl_cb *) data; 8454 struct packet_msg_short *pkt; 8455 u32 value, ctl, pkt_size = sizeof(*pkt); 8456 8457 pkt = cb->kernel_address + size; 8458 memset(pkt, 0, pkt_size); 8459 8460 /* Inc by 1, Mode ADD */ 8461 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8462 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8463 8464 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8465 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8466 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8467 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8469 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8470 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8471 8472 pkt->value = cpu_to_le32(value); 8473 pkt->ctl = cpu_to_le32(ctl); 8474 8475 return size + pkt_size; 8476 } 8477 8478 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8479 u16 addr) 8480 { 8481 u32 ctl, pkt_size = sizeof(*pkt); 8482 8483 memset(pkt, 0, pkt_size); 8484 8485 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8486 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8487 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8489 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8490 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8491 8492 pkt->value = cpu_to_le32(value); 8493 pkt->ctl = cpu_to_le32(ctl); 8494 8495 return pkt_size; 8496 } 8497 8498 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8499 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8500 u16 sob_val, u16 mon_id) 8501 { 8502 u64 monitor_base; 8503 u32 ctl, value, pkt_size = sizeof(*pkt); 8504 u16 msg_addr_offset; 8505 u8 mask; 8506 8507 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8508 dev_err(hdev->dev, 8509 "sob_base %u (mask %#x) is not valid\n", 8510 sob_base, sob_mask); 8511 return 0; 8512 } 8513 8514 /* 8515 * monitor_base should be the content of the base0 address registers, 8516 * so it will be added to the msg short offsets 8517 */ 8518 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8519 8520 msg_addr_offset = 8521 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8522 monitor_base; 8523 8524 memset(pkt, 0, pkt_size); 8525 8526 /* Monitor config packet: bind the monitor to a sync object */ 8527 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8528 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8529 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8530 0); /* GREATER OR EQUAL*/ 8531 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8532 8533 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8534 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8535 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8536 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8538 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8539 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8540 8541 pkt->value = cpu_to_le32(value); 8542 pkt->ctl = cpu_to_le32(ctl); 8543 8544 return pkt_size; 8545 } 8546 8547 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8548 { 8549 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8550 8551 memset(pkt, 0, pkt_size); 8552 8553 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8554 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8555 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8556 8557 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8558 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8559 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8560 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8561 8562 pkt->cfg = cpu_to_le32(cfg); 8563 pkt->ctl = cpu_to_le32(ctl); 8564 8565 return pkt_size; 8566 } 8567 8568 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8569 { 8570 u32 offset, nic_index; 8571 8572 switch (queue_id) { 8573 case GAUDI_QUEUE_ID_DMA_0_0: 8574 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8575 break; 8576 case GAUDI_QUEUE_ID_DMA_0_1: 8577 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8578 break; 8579 case GAUDI_QUEUE_ID_DMA_0_2: 8580 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8581 break; 8582 case GAUDI_QUEUE_ID_DMA_0_3: 8583 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8584 break; 8585 case GAUDI_QUEUE_ID_DMA_1_0: 8586 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8587 break; 8588 case GAUDI_QUEUE_ID_DMA_1_1: 8589 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8590 break; 8591 case GAUDI_QUEUE_ID_DMA_1_2: 8592 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8593 break; 8594 case GAUDI_QUEUE_ID_DMA_1_3: 8595 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8596 break; 8597 case GAUDI_QUEUE_ID_DMA_5_0: 8598 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8599 break; 8600 case GAUDI_QUEUE_ID_DMA_5_1: 8601 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8602 break; 8603 case GAUDI_QUEUE_ID_DMA_5_2: 8604 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8605 break; 8606 case GAUDI_QUEUE_ID_DMA_5_3: 8607 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8608 break; 8609 case GAUDI_QUEUE_ID_TPC_7_0: 8610 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8611 break; 8612 case GAUDI_QUEUE_ID_TPC_7_1: 8613 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8614 break; 8615 case GAUDI_QUEUE_ID_TPC_7_2: 8616 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8617 break; 8618 case GAUDI_QUEUE_ID_TPC_7_3: 8619 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8620 break; 8621 case GAUDI_QUEUE_ID_NIC_0_0: 8622 case GAUDI_QUEUE_ID_NIC_1_0: 8623 case GAUDI_QUEUE_ID_NIC_2_0: 8624 case GAUDI_QUEUE_ID_NIC_3_0: 8625 case GAUDI_QUEUE_ID_NIC_4_0: 8626 case GAUDI_QUEUE_ID_NIC_5_0: 8627 case GAUDI_QUEUE_ID_NIC_6_0: 8628 case GAUDI_QUEUE_ID_NIC_7_0: 8629 case GAUDI_QUEUE_ID_NIC_8_0: 8630 case GAUDI_QUEUE_ID_NIC_9_0: 8631 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8632 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8633 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8634 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8635 break; 8636 case GAUDI_QUEUE_ID_NIC_0_1: 8637 case GAUDI_QUEUE_ID_NIC_1_1: 8638 case GAUDI_QUEUE_ID_NIC_2_1: 8639 case GAUDI_QUEUE_ID_NIC_3_1: 8640 case GAUDI_QUEUE_ID_NIC_4_1: 8641 case GAUDI_QUEUE_ID_NIC_5_1: 8642 case GAUDI_QUEUE_ID_NIC_6_1: 8643 case GAUDI_QUEUE_ID_NIC_7_1: 8644 case GAUDI_QUEUE_ID_NIC_8_1: 8645 case GAUDI_QUEUE_ID_NIC_9_1: 8646 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8647 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8648 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8649 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8650 break; 8651 case GAUDI_QUEUE_ID_NIC_0_2: 8652 case GAUDI_QUEUE_ID_NIC_1_2: 8653 case GAUDI_QUEUE_ID_NIC_2_2: 8654 case GAUDI_QUEUE_ID_NIC_3_2: 8655 case GAUDI_QUEUE_ID_NIC_4_2: 8656 case GAUDI_QUEUE_ID_NIC_5_2: 8657 case GAUDI_QUEUE_ID_NIC_6_2: 8658 case GAUDI_QUEUE_ID_NIC_7_2: 8659 case GAUDI_QUEUE_ID_NIC_8_2: 8660 case GAUDI_QUEUE_ID_NIC_9_2: 8661 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8662 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8663 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8664 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8665 break; 8666 case GAUDI_QUEUE_ID_NIC_0_3: 8667 case GAUDI_QUEUE_ID_NIC_1_3: 8668 case GAUDI_QUEUE_ID_NIC_2_3: 8669 case GAUDI_QUEUE_ID_NIC_3_3: 8670 case GAUDI_QUEUE_ID_NIC_4_3: 8671 case GAUDI_QUEUE_ID_NIC_5_3: 8672 case GAUDI_QUEUE_ID_NIC_6_3: 8673 case GAUDI_QUEUE_ID_NIC_7_3: 8674 case GAUDI_QUEUE_ID_NIC_8_3: 8675 case GAUDI_QUEUE_ID_NIC_9_3: 8676 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8677 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8678 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8679 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8680 break; 8681 default: 8682 return -EINVAL; 8683 } 8684 8685 *addr = CFG_BASE + offset; 8686 8687 return 0; 8688 } 8689 8690 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8691 { 8692 u64 monitor_base; 8693 u32 size = 0; 8694 u16 msg_addr_offset; 8695 8696 /* 8697 * monitor_base should be the content of the base0 address registers, 8698 * so it will be added to the msg short offsets 8699 */ 8700 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8701 8702 /* First monitor config packet: low address of the sync */ 8703 msg_addr_offset = 8704 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8705 monitor_base; 8706 8707 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8708 msg_addr_offset); 8709 8710 /* Second monitor config packet: high address of the sync */ 8711 msg_addr_offset = 8712 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8713 monitor_base; 8714 8715 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8716 msg_addr_offset); 8717 8718 /* 8719 * Third monitor config packet: the payload, i.e. what to write when the 8720 * sync triggers 8721 */ 8722 msg_addr_offset = 8723 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8724 monitor_base; 8725 8726 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8727 8728 return size; 8729 } 8730 8731 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8732 struct hl_gen_wait_properties *prop) 8733 { 8734 struct hl_cb *cb = (struct hl_cb *) prop->data; 8735 void *buf = cb->kernel_address; 8736 u64 fence_addr = 0; 8737 u32 size = prop->size; 8738 8739 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8740 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8741 prop->q_idx); 8742 return 0; 8743 } 8744 8745 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8746 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8747 prop->sob_mask, prop->sob_val, prop->mon_id); 8748 size += gaudi_add_fence_pkt(buf + size); 8749 8750 return size; 8751 } 8752 8753 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8754 { 8755 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8756 8757 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8758 hw_sob->sob_id); 8759 8760 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8761 hw_sob->sob_id * 4, 0); 8762 8763 kref_init(&hw_sob->kref); 8764 } 8765 8766 static u64 gaudi_get_device_time(struct hl_device *hdev) 8767 { 8768 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8769 8770 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8771 } 8772 8773 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8774 u32 *block_size, u32 *block_id) 8775 { 8776 return -EPERM; 8777 } 8778 8779 static int gaudi_block_mmap(struct hl_device *hdev, 8780 struct vm_area_struct *vma, 8781 u32 block_id, u32 block_size) 8782 { 8783 return -EPERM; 8784 } 8785 8786 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8787 { 8788 struct cpu_dyn_regs *dyn_regs = 8789 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8790 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8791 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8792 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8793 8794 WREG32(irq_handler_offset, 8795 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8796 } 8797 8798 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8799 { 8800 return -EINVAL; 8801 } 8802 8803 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8804 { 8805 switch (pll_idx) { 8806 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8807 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8808 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8809 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8810 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8811 case HL_GAUDI_MME_PLL: return MME_PLL; 8812 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8813 case HL_GAUDI_IF_PLL: return IF_PLL; 8814 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8815 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8816 default: return -EINVAL; 8817 } 8818 } 8819 8820 static int gaudi_add_sync_to_engine_map_entry( 8821 struct hl_sync_to_engine_map *map, u32 reg_value, 8822 enum hl_sync_engine_type engine_type, u32 engine_id) 8823 { 8824 struct hl_sync_to_engine_map_entry *entry; 8825 8826 /* Reg value represents a partial address of sync object, 8827 * it is used as unique identifier. For this we need to 8828 * clear the cutoff cfg base bits from the value. 8829 */ 8830 if (reg_value == 0 || reg_value == 0xffffffff) 8831 return 0; 8832 reg_value -= lower_32_bits(CFG_BASE); 8833 8834 /* create a new hash entry */ 8835 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8836 if (!entry) 8837 return -ENOMEM; 8838 entry->engine_type = engine_type; 8839 entry->engine_id = engine_id; 8840 entry->sync_id = reg_value; 8841 hash_add(map->tb, &entry->node, reg_value); 8842 8843 return 0; 8844 } 8845 8846 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8847 struct hl_sync_to_engine_map *map) 8848 { 8849 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8850 int i, j, rc; 8851 u32 reg_value; 8852 8853 /* Iterate over TPC engines */ 8854 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8855 8856 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8857 sds->props[SP_NEXT_TPC] * i); 8858 8859 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8860 ENGINE_TPC, i); 8861 if (rc) 8862 goto free_sync_to_engine_map; 8863 } 8864 8865 /* Iterate over MME engines */ 8866 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8867 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8868 8869 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8870 sds->props[SP_NEXT_MME] * i + 8871 j * sizeof(u32)); 8872 8873 rc = gaudi_add_sync_to_engine_map_entry( 8874 map, reg_value, ENGINE_MME, 8875 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8876 if (rc) 8877 goto free_sync_to_engine_map; 8878 } 8879 } 8880 8881 /* Iterate over DMA engines */ 8882 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8883 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8884 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8885 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8886 ENGINE_DMA, i); 8887 if (rc) 8888 goto free_sync_to_engine_map; 8889 } 8890 8891 return 0; 8892 8893 free_sync_to_engine_map: 8894 hl_state_dump_free_sync_to_engine_map(map); 8895 8896 return rc; 8897 } 8898 8899 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8900 { 8901 return FIELD_GET( 8902 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8903 mon->status); 8904 } 8905 8906 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8907 { 8908 const size_t max_write = 10; 8909 u32 gid, mask, sob; 8910 int i, offset; 8911 8912 /* Sync object ID is calculated as follows: 8913 * (8 * group_id + cleared bits in mask) 8914 */ 8915 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8916 mon->arm_data); 8917 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8918 mon->arm_data); 8919 8920 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8921 max_write; mask >>= 1, i++) { 8922 if (!(mask & 1)) { 8923 sob = gid * MONITOR_MAX_SOBS + i; 8924 8925 if (offset > 0) 8926 offset += snprintf(sobs + offset, max_write, 8927 ", "); 8928 8929 offset += snprintf(sobs + offset, max_write, "%u", sob); 8930 } 8931 } 8932 } 8933 8934 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8935 struct hl_device *hdev, 8936 struct hl_mon_state_dump *mon) 8937 { 8938 const char *name; 8939 char scratch_buf1[BIN_REG_STRING_SIZE], 8940 scratch_buf2[BIN_REG_STRING_SIZE]; 8941 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8942 8943 name = hl_state_dump_get_monitor_name(hdev, mon); 8944 if (!name) 8945 name = ""; 8946 8947 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 8948 8949 return hl_snprintf_resize( 8950 buf, size, offset, 8951 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 8952 mon->id, name, 8953 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8954 mon->arm_data), 8955 hl_format_as_binary( 8956 scratch_buf1, sizeof(scratch_buf1), 8957 FIELD_GET( 8958 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8959 mon->arm_data)), 8960 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 8961 mon->arm_data), 8962 mon->wr_data, 8963 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 8964 hl_format_as_binary( 8965 scratch_buf2, sizeof(scratch_buf2), 8966 FIELD_GET( 8967 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 8968 mon->status)), 8969 monitored_sobs); 8970 } 8971 8972 8973 static int gaudi_print_fences_single_engine( 8974 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 8975 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 8976 size_t *size, size_t *offset) 8977 { 8978 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8979 int rc = -ENOMEM, i; 8980 u32 *statuses, *fences; 8981 8982 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 8983 sizeof(*statuses), GFP_KERNEL); 8984 if (!statuses) 8985 goto out; 8986 8987 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 8988 sds->props[SP_ENGINE_NUM_OF_QUEUES], 8989 sizeof(*fences), GFP_KERNEL); 8990 if (!fences) 8991 goto free_status; 8992 8993 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 8994 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 8995 8996 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 8997 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 8998 fences[i] = RREG32(base_offset + i * sizeof(u32)); 8999 9000 /* The actual print */ 9001 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9002 u32 fence_id; 9003 u64 fence_cnt, fence_rdata; 9004 const char *engine_name; 9005 9006 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9007 statuses[i])) 9008 continue; 9009 9010 fence_id = 9011 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9012 fence_cnt = base_offset + CFG_BASE + 9013 sizeof(u32) * 9014 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9015 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9016 sds->props[SP_FENCE0_RDATA_OFFSET]; 9017 engine_name = hl_sync_engine_to_string(engine_type); 9018 9019 rc = hl_snprintf_resize( 9020 buf, size, offset, 9021 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9022 engine_name, engine_id, 9023 i, fence_id, 9024 fence_cnt, engine_name, engine_id, fence_id, i, 9025 fence_rdata, engine_name, engine_id, fence_id, i, 9026 fences[fence_id], 9027 statuses[i]); 9028 if (rc) 9029 goto free_fences; 9030 } 9031 9032 rc = 0; 9033 9034 free_fences: 9035 kfree(fences); 9036 free_status: 9037 kfree(statuses); 9038 out: 9039 return rc; 9040 } 9041 9042 9043 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9044 .monitor_valid = gaudi_monitor_valid, 9045 .print_single_monitor = gaudi_print_single_monitor, 9046 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9047 .print_fences_single_engine = gaudi_print_fences_single_engine, 9048 }; 9049 9050 static void gaudi_state_dump_init(struct hl_device *hdev) 9051 { 9052 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9053 int i; 9054 9055 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9056 hash_add(sds->so_id_to_str_tb, 9057 &gaudi_so_id_to_str[i].node, 9058 gaudi_so_id_to_str[i].id); 9059 9060 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9061 hash_add(sds->monitor_id_to_str_tb, 9062 &gaudi_monitor_id_to_str[i].node, 9063 gaudi_monitor_id_to_str[i].id); 9064 9065 sds->props = gaudi_state_dump_specs_props; 9066 9067 sds->sync_namager_names = gaudi_sync_manager_names; 9068 9069 sds->funcs = gaudi_state_dump_funcs; 9070 } 9071 9072 static u32 *gaudi_get_stream_master_qid_arr(void) 9073 { 9074 return gaudi_stream_master; 9075 } 9076 9077 static int gaudi_set_dram_properties(struct hl_device *hdev) 9078 { 9079 return 0; 9080 } 9081 9082 static int gaudi_set_binning_masks(struct hl_device *hdev) 9083 { 9084 return 0; 9085 } 9086 9087 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9088 { 9089 } 9090 9091 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9092 { 9093 struct hl_device *hdev = dev_get_drvdata(dev); 9094 struct cpucp_info *cpucp_info; 9095 9096 cpucp_info = &hdev->asic_prop.cpucp_info; 9097 9098 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9099 } 9100 9101 static DEVICE_ATTR_RO(infineon_ver); 9102 9103 static struct attribute *gaudi_vrm_dev_attrs[] = { 9104 &dev_attr_infineon_ver.attr, 9105 NULL, 9106 }; 9107 9108 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9109 struct attribute_group *dev_vrm_attr_grp) 9110 { 9111 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9112 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9113 } 9114 9115 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9116 { 9117 return 0; 9118 } 9119 9120 static const struct hl_asic_funcs gaudi_funcs = { 9121 .early_init = gaudi_early_init, 9122 .early_fini = gaudi_early_fini, 9123 .late_init = gaudi_late_init, 9124 .late_fini = gaudi_late_fini, 9125 .sw_init = gaudi_sw_init, 9126 .sw_fini = gaudi_sw_fini, 9127 .hw_init = gaudi_hw_init, 9128 .hw_fini = gaudi_hw_fini, 9129 .halt_engines = gaudi_halt_engines, 9130 .suspend = gaudi_suspend, 9131 .resume = gaudi_resume, 9132 .mmap = gaudi_mmap, 9133 .ring_doorbell = gaudi_ring_doorbell, 9134 .pqe_write = gaudi_pqe_write, 9135 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9136 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9137 .scrub_device_mem = gaudi_scrub_device_mem, 9138 .scrub_device_dram = gaudi_scrub_device_dram, 9139 .get_int_queue_base = gaudi_get_int_queue_base, 9140 .test_queues = gaudi_test_queues, 9141 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9142 .asic_dma_pool_free = gaudi_dma_pool_free, 9143 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9144 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9145 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, 9146 .cs_parser = gaudi_cs_parser, 9147 .dma_map_sgtable = hl_asic_dma_map_sgtable, 9148 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9149 .update_eq_ci = gaudi_update_eq_ci, 9150 .context_switch = gaudi_context_switch, 9151 .restore_phase_topology = gaudi_restore_phase_topology, 9152 .debugfs_read_dma = gaudi_debugfs_read_dma, 9153 .add_device_attr = gaudi_add_device_attr, 9154 .handle_eqe = gaudi_handle_eqe, 9155 .get_events_stat = gaudi_get_events_stat, 9156 .read_pte = gaudi_read_pte, 9157 .write_pte = gaudi_write_pte, 9158 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9159 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9160 .mmu_prefetch_cache_range = NULL, 9161 .send_heartbeat = gaudi_send_heartbeat, 9162 .debug_coresight = gaudi_debug_coresight, 9163 .is_device_idle = gaudi_is_device_idle, 9164 .compute_reset_late_init = gaudi_compute_reset_late_init, 9165 .hw_queues_lock = gaudi_hw_queues_lock, 9166 .hw_queues_unlock = gaudi_hw_queues_unlock, 9167 .get_pci_id = gaudi_get_pci_id, 9168 .get_eeprom_data = gaudi_get_eeprom_data, 9169 .get_monitor_dump = gaudi_get_monitor_dump, 9170 .send_cpu_message = gaudi_send_cpu_message, 9171 .pci_bars_map = gaudi_pci_bars_map, 9172 .init_iatu = gaudi_init_iatu, 9173 .rreg = hl_rreg, 9174 .wreg = hl_wreg, 9175 .halt_coresight = gaudi_halt_coresight, 9176 .ctx_init = gaudi_ctx_init, 9177 .ctx_fini = gaudi_ctx_fini, 9178 .pre_schedule_cs = gaudi_pre_schedule_cs, 9179 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9180 .load_firmware_to_device = gaudi_load_firmware_to_device, 9181 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9182 .get_signal_cb_size = gaudi_get_signal_cb_size, 9183 .get_wait_cb_size = gaudi_get_wait_cb_size, 9184 .gen_signal_cb = gaudi_gen_signal_cb, 9185 .gen_wait_cb = gaudi_gen_wait_cb, 9186 .reset_sob = gaudi_reset_sob, 9187 .reset_sob_group = gaudi_reset_sob_group, 9188 .get_device_time = gaudi_get_device_time, 9189 .pb_print_security_errors = NULL, 9190 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9191 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9192 .get_dec_base_addr = NULL, 9193 .scramble_addr = hl_mmu_scramble_addr, 9194 .descramble_addr = hl_mmu_descramble_addr, 9195 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9196 .get_hw_block_id = gaudi_get_hw_block_id, 9197 .hw_block_mmap = gaudi_block_mmap, 9198 .enable_events_from_fw = gaudi_enable_events_from_fw, 9199 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9200 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9201 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9202 .init_firmware_loader = gaudi_init_firmware_loader, 9203 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9204 .state_dump_init = gaudi_state_dump_init, 9205 .get_sob_addr = gaudi_get_sob_addr, 9206 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9207 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9208 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9209 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9210 .access_dev_mem = hl_access_dev_mem, 9211 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9212 .send_device_activity = gaudi_send_device_activity, 9213 .set_dram_properties = gaudi_set_dram_properties, 9214 .set_binning_masks = gaudi_set_binning_masks, 9215 }; 9216 9217 /** 9218 * gaudi_set_asic_funcs - set GAUDI function pointers 9219 * 9220 * @hdev: pointer to hl_device structure 9221 * 9222 */ 9223 void gaudi_set_asic_funcs(struct hl_device *hdev) 9224 { 9225 hdev->asic_funcs = &gaudi_funcs; 9226 } 9227