1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE); 67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE); 68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE); 69 70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 71 72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 76 77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 86 87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 88 89 #define GAUDI_MAX_STRING_LEN 20 90 91 #define GAUDI_CB_POOL_CB_CNT 512 92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 93 94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 95 96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 97 98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 99 100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 101 102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 103 104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 105 106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 107 108 #define MONITOR_SOB_STRING_SIZE 256 109 110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 111 GAUDI_QUEUE_ID_DMA_0_0, 112 GAUDI_QUEUE_ID_DMA_0_1, 113 GAUDI_QUEUE_ID_DMA_0_2, 114 GAUDI_QUEUE_ID_DMA_0_3, 115 GAUDI_QUEUE_ID_DMA_1_0, 116 GAUDI_QUEUE_ID_DMA_1_1, 117 GAUDI_QUEUE_ID_DMA_1_2, 118 GAUDI_QUEUE_ID_DMA_1_3 119 }; 120 121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 130 }; 131 132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 133 [0] = GAUDI_QUEUE_ID_DMA_0_0, 134 [1] = GAUDI_QUEUE_ID_DMA_0_1, 135 [2] = GAUDI_QUEUE_ID_DMA_0_2, 136 [3] = GAUDI_QUEUE_ID_DMA_0_3, 137 [4] = GAUDI_QUEUE_ID_DMA_1_0, 138 [5] = GAUDI_QUEUE_ID_DMA_1_1, 139 [6] = GAUDI_QUEUE_ID_DMA_1_2, 140 [7] = GAUDI_QUEUE_ID_DMA_1_3, 141 }; 142 143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 144 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 149 [PACKET_REPEAT] = sizeof(struct packet_repeat), 150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 151 [PACKET_FENCE] = sizeof(struct packet_fence), 152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 153 [PACKET_NOP] = sizeof(struct packet_nop), 154 [PACKET_STOP] = sizeof(struct packet_stop), 155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 156 [PACKET_WAIT] = sizeof(struct packet_wait), 157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 158 }; 159 160 static inline bool validate_packet_id(enum packet_id id) 161 { 162 switch (id) { 163 case PACKET_WREG_32: 164 case PACKET_WREG_BULK: 165 case PACKET_MSG_LONG: 166 case PACKET_MSG_SHORT: 167 case PACKET_CP_DMA: 168 case PACKET_REPEAT: 169 case PACKET_MSG_PROT: 170 case PACKET_FENCE: 171 case PACKET_LIN_DMA: 172 case PACKET_NOP: 173 case PACKET_STOP: 174 case PACKET_ARB_POINT: 175 case PACKET_WAIT: 176 case PACKET_LOAD_AND_EXE: 177 return true; 178 default: 179 return false; 180 } 181 } 182 183 static const char * const 184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 185 "tpc_address_exceed_slm", 186 "tpc_div_by_0", 187 "tpc_spu_mac_overflow", 188 "tpc_spu_addsub_overflow", 189 "tpc_spu_abs_overflow", 190 "tpc_spu_fp_dst_nan_inf", 191 "tpc_spu_fp_dst_denorm", 192 "tpc_vpu_mac_overflow", 193 "tpc_vpu_addsub_overflow", 194 "tpc_vpu_abs_overflow", 195 "tpc_vpu_fp_dst_nan_inf", 196 "tpc_vpu_fp_dst_denorm", 197 "tpc_assertions", 198 "tpc_illegal_instruction", 199 "tpc_pc_wrap_around", 200 "tpc_qm_sw_err", 201 "tpc_hbw_rresp_err", 202 "tpc_hbw_bresp_err", 203 "tpc_lbw_rresp_err", 204 "tpc_lbw_bresp_err" 205 }; 206 207 static const char * const 208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 209 "PQ AXI HBW error", 210 "CQ AXI HBW error", 211 "CP AXI HBW error", 212 "CP error due to undefined OPCODE", 213 "CP encountered STOP OPCODE", 214 "CP AXI LBW error", 215 "CP WRREG32 or WRBULK returned error", 216 "N/A", 217 "FENCE 0 inc over max value and clipped", 218 "FENCE 1 inc over max value and clipped", 219 "FENCE 2 inc over max value and clipped", 220 "FENCE 3 inc over max value and clipped", 221 "FENCE 0 dec under min value and clipped", 222 "FENCE 1 dec under min value and clipped", 223 "FENCE 2 dec under min value and clipped", 224 "FENCE 3 dec under min value and clipped" 225 }; 226 227 static const char * const 228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 229 "Choice push while full error", 230 "Choice Q watchdog error", 231 "MSG AXI LBW returned with error" 232 }; 233 234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 348 }; 349 350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 378 }; 379 380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 392 }; 393 394 static s64 gaudi_state_dump_specs_props[] = { 395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 398 [SP_MON_OBJ_WR_ADDR_LOW] = 399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 400 [SP_MON_OBJ_WR_ADDR_HIGH] = 401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 422 [SP_FENCE0_CNT_OFFSET] = 423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 424 [SP_FENCE0_RDATA_OFFSET] = 425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_NUM_CORES] = 1, 428 }; 429 430 static const int gaudi_queue_id_to_engine_id[] = { 431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 460 }; 461 462 /* The order here is opposite to the order of the indexing in the h/w. 463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 464 */ 465 static const char * const gaudi_sync_manager_names[] = { 466 "SYNC_MGR_E_N", 467 "SYNC_MGR_W_N", 468 "SYNC_MGR_E_S", 469 "SYNC_MGR_W_S", 470 NULL 471 }; 472 473 struct ecc_info_extract_params { 474 u64 block_address; 475 u32 num_memories; 476 bool derr; 477 }; 478 479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 480 u64 phys_addr); 481 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 482 struct hl_cs_job *job); 483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 484 u32 size, u64 val); 485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 486 u32 num_regs, u32 val); 487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 488 u32 tpc_id); 489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 490 static int gaudi_cpucp_info_get(struct hl_device *hdev); 491 static void gaudi_disable_clock_gating(struct hl_device *hdev); 492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 494 u32 size, bool eb); 495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 496 struct hl_gen_wait_properties *prop); 497 static inline enum hl_collective_mode 498 get_collective_mode(struct hl_device *hdev, u32 queue_id) 499 { 500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 501 return HL_COLLECTIVE_MASTER; 502 503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 505 return HL_COLLECTIVE_SLAVE; 506 507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 509 return HL_COLLECTIVE_SLAVE; 510 511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 513 return HL_COLLECTIVE_SLAVE; 514 515 return HL_COLLECTIVE_NOT_SUPPORTED; 516 } 517 518 static inline void set_default_power_values(struct hl_device *hdev) 519 { 520 struct asic_fixed_properties *prop = &hdev->asic_prop; 521 522 if (hdev->card_type == cpucp_card_type_pmc) { 523 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 524 525 if (prop->fw_security_enabled) 526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 527 else 528 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 529 } else { 530 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 531 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 532 } 533 } 534 535 static int gaudi_set_fixed_properties(struct hl_device *hdev) 536 { 537 struct asic_fixed_properties *prop = &hdev->asic_prop; 538 u32 num_sync_stream_queues = 0; 539 int i; 540 541 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 542 prop->hw_queues_props = kcalloc(prop->max_queues, 543 sizeof(struct hw_queue_properties), 544 GFP_KERNEL); 545 546 if (!prop->hw_queues_props) 547 return -ENOMEM; 548 549 for (i = 0 ; i < prop->max_queues ; i++) { 550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 552 prop->hw_queues_props[i].driver_only = 0; 553 prop->hw_queues_props[i].supports_sync_stream = 1; 554 prop->hw_queues_props[i].cb_alloc_flags = 555 CB_ALLOC_KERNEL; 556 num_sync_stream_queues++; 557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 559 prop->hw_queues_props[i].driver_only = 1; 560 prop->hw_queues_props[i].supports_sync_stream = 0; 561 prop->hw_queues_props[i].cb_alloc_flags = 562 CB_ALLOC_KERNEL; 563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 565 prop->hw_queues_props[i].driver_only = 0; 566 prop->hw_queues_props[i].supports_sync_stream = 0; 567 prop->hw_queues_props[i].cb_alloc_flags = 568 CB_ALLOC_USER; 569 570 } 571 prop->hw_queues_props[i].collective_mode = 572 get_collective_mode(hdev, i); 573 } 574 575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 576 prop->cfg_base_address = CFG_BASE; 577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 578 prop->host_base_address = HOST_PHYS_BASE; 579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 581 prop->completion_mode = HL_COMPLETION_MODE_JOB; 582 prop->collective_first_sob = 0; 583 prop->collective_first_mon = 0; 584 585 /* 2 SOBs per internal queue stream are reserved for collective */ 586 prop->sync_stream_first_sob = 587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 588 * QMAN_STREAMS * HL_RSVD_SOBS; 589 590 /* 1 monitor per internal queue stream are reserved for collective 591 * 2 monitors per external queue stream are reserved for collective 592 */ 593 prop->sync_stream_first_mon = 594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 595 (NUMBER_OF_EXT_HW_QUEUES * 2); 596 597 prop->dram_base_address = DRAM_PHYS_BASE; 598 prop->dram_size = GAUDI_HBM_SIZE_32GB; 599 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 601 602 prop->sram_base_address = SRAM_BASE_ADDR; 603 prop->sram_size = SRAM_SIZE; 604 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 605 prop->sram_user_base_address = 606 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 607 608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 610 611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 612 if (hdev->pldm) 613 prop->mmu_pgt_size = 0x800000; /* 8MB */ 614 else 615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 616 prop->mmu_pte_size = HL_PTE_SIZE; 617 prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; 618 prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 619 prop->dram_page_size = PAGE_SIZE_2MB; 620 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 621 prop->dram_supports_virtual_memory = false; 622 623 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 624 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 625 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 626 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 627 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 628 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 629 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 630 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 631 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 632 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 633 prop->pmmu.start_addr = VA_HOST_SPACE_START; 634 prop->pmmu.end_addr = 635 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 636 prop->pmmu.page_size = PAGE_SIZE_4KB; 637 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 638 prop->pmmu.last_mask = LAST_MASK; 639 /* TODO: will be duplicated until implementing per-MMU props */ 640 prop->pmmu.hop_table_size = prop->mmu_hop_table_size; 641 prop->pmmu.hop0_tables_total_size = prop->mmu_hop0_tables_total_size; 642 643 /* PMMU and HPMMU are the same except of page size */ 644 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 645 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 646 647 /* shifts and masks are the same in PMMU and DMMU */ 648 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 649 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 650 prop->dmmu.end_addr = VA_HOST_SPACE_END; 651 prop->dmmu.page_size = PAGE_SIZE_2MB; 652 653 prop->cfg_size = CFG_SIZE; 654 prop->max_asid = MAX_ASID; 655 prop->num_of_events = GAUDI_EVENT_SIZE; 656 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 657 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 658 659 set_default_power_values(hdev); 660 661 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 662 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 663 664 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 665 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 666 667 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 668 CARD_NAME_MAX_LEN); 669 670 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 671 672 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 673 prop->sync_stream_first_sob + 674 (num_sync_stream_queues * HL_RSVD_SOBS); 675 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 676 prop->sync_stream_first_mon + 677 (num_sync_stream_queues * HL_RSVD_MONS); 678 679 prop->first_available_user_interrupt = USHRT_MAX; 680 prop->tpc_interrupt_id = USHRT_MAX; 681 682 /* single msi */ 683 prop->eq_interrupt_id = 0; 684 685 for (i = 0 ; i < HL_MAX_DCORES ; i++) 686 prop->first_available_cq[i] = USHRT_MAX; 687 688 prop->fw_cpu_boot_dev_sts0_valid = false; 689 prop->fw_cpu_boot_dev_sts1_valid = false; 690 prop->hard_reset_done_by_fw = false; 691 prop->gic_interrupts_enable = true; 692 693 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 694 695 prop->clk_pll_index = HL_GAUDI_MME_PLL; 696 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 697 698 prop->use_get_power_for_reset_history = true; 699 700 prop->configurable_stop_on_err = true; 701 702 prop->set_max_power_on_device_init = true; 703 704 prop->dma_mask = 48; 705 706 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 707 708 return 0; 709 } 710 711 static int gaudi_pci_bars_map(struct hl_device *hdev) 712 { 713 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 714 bool is_wc[3] = {false, false, true}; 715 int rc; 716 717 rc = hl_pci_bars_map(hdev, name, is_wc); 718 if (rc) 719 return rc; 720 721 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 722 (CFG_BASE - SPI_FLASH_BASE_ADDR); 723 724 return 0; 725 } 726 727 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 728 { 729 struct gaudi_device *gaudi = hdev->asic_specific; 730 struct hl_inbound_pci_region pci_region; 731 u64 old_addr = addr; 732 int rc; 733 734 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 735 return old_addr; 736 737 if (hdev->asic_prop.iatu_done_by_fw) 738 return U64_MAX; 739 740 /* Inbound Region 2 - Bar 4 - Point to HBM */ 741 pci_region.mode = PCI_BAR_MATCH_MODE; 742 pci_region.bar = HBM_BAR_ID; 743 pci_region.addr = addr; 744 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 745 if (rc) 746 return U64_MAX; 747 748 if (gaudi) { 749 old_addr = gaudi->hbm_bar_cur_addr; 750 gaudi->hbm_bar_cur_addr = addr; 751 } 752 753 return old_addr; 754 } 755 756 static int gaudi_init_iatu(struct hl_device *hdev) 757 { 758 struct hl_inbound_pci_region inbound_region; 759 struct hl_outbound_pci_region outbound_region; 760 int rc; 761 762 if (hdev->asic_prop.iatu_done_by_fw) 763 return 0; 764 765 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 766 inbound_region.mode = PCI_BAR_MATCH_MODE; 767 inbound_region.bar = SRAM_BAR_ID; 768 inbound_region.addr = SRAM_BASE_ADDR; 769 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 770 if (rc) 771 goto done; 772 773 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 774 inbound_region.mode = PCI_BAR_MATCH_MODE; 775 inbound_region.bar = CFG_BAR_ID; 776 inbound_region.addr = SPI_FLASH_BASE_ADDR; 777 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 778 if (rc) 779 goto done; 780 781 /* Inbound Region 2 - Bar 4 - Point to HBM */ 782 inbound_region.mode = PCI_BAR_MATCH_MODE; 783 inbound_region.bar = HBM_BAR_ID; 784 inbound_region.addr = DRAM_PHYS_BASE; 785 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 786 if (rc) 787 goto done; 788 789 /* Outbound Region 0 - Point to Host */ 790 outbound_region.addr = HOST_PHYS_BASE; 791 outbound_region.size = HOST_PHYS_SIZE; 792 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 793 794 done: 795 return rc; 796 } 797 798 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 799 { 800 return RREG32(mmHW_STATE); 801 } 802 803 static int gaudi_early_init(struct hl_device *hdev) 804 { 805 struct asic_fixed_properties *prop = &hdev->asic_prop; 806 struct pci_dev *pdev = hdev->pdev; 807 resource_size_t pci_bar_size; 808 u32 fw_boot_status; 809 int rc; 810 811 rc = gaudi_set_fixed_properties(hdev); 812 if (rc) { 813 dev_err(hdev->dev, "Failed setting fixed properties\n"); 814 return rc; 815 } 816 817 /* Check BAR sizes */ 818 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 819 820 if (pci_bar_size != SRAM_BAR_SIZE) { 821 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 822 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 823 rc = -ENODEV; 824 goto free_queue_props; 825 } 826 827 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 828 829 if (pci_bar_size != CFG_BAR_SIZE) { 830 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 831 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 832 rc = -ENODEV; 833 goto free_queue_props; 834 } 835 836 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 837 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 838 839 /* If FW security is enabled at this point it means no access to ELBI */ 840 if (hdev->asic_prop.fw_security_enabled) { 841 hdev->asic_prop.iatu_done_by_fw = true; 842 843 /* 844 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 845 * decision can only be taken based on PCI ID security. 846 */ 847 hdev->asic_prop.gic_interrupts_enable = false; 848 goto pci_init; 849 } 850 851 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 852 &fw_boot_status); 853 if (rc) 854 goto free_queue_props; 855 856 /* Check whether FW is configuring iATU */ 857 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 858 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 859 hdev->asic_prop.iatu_done_by_fw = true; 860 861 pci_init: 862 rc = hl_pci_init(hdev); 863 if (rc) 864 goto free_queue_props; 865 866 /* Before continuing in the initialization, we need to read the preboot 867 * version to determine whether we run with a security-enabled firmware 868 */ 869 rc = hl_fw_read_preboot_status(hdev); 870 if (rc) { 871 if (hdev->reset_on_preboot_fail) 872 /* we are already on failure flow, so don't check if hw_fini fails. */ 873 hdev->asic_funcs->hw_fini(hdev, true, false); 874 goto pci_fini; 875 } 876 877 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 878 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 879 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 880 if (rc) { 881 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 882 goto pci_fini; 883 } 884 } 885 886 return 0; 887 888 pci_fini: 889 hl_pci_fini(hdev); 890 free_queue_props: 891 kfree(hdev->asic_prop.hw_queues_props); 892 return rc; 893 } 894 895 static int gaudi_early_fini(struct hl_device *hdev) 896 { 897 kfree(hdev->asic_prop.hw_queues_props); 898 hl_pci_fini(hdev); 899 900 return 0; 901 } 902 903 /** 904 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 905 * 906 * @hdev: pointer to hl_device structure 907 * 908 */ 909 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 910 { 911 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 912 struct asic_fixed_properties *prop = &hdev->asic_prop; 913 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 914 int rc; 915 916 if ((hdev->fw_components & FW_TYPE_LINUX) && 917 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 918 struct gaudi_device *gaudi = hdev->asic_specific; 919 920 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 921 return 0; 922 923 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 924 925 if (rc) 926 return rc; 927 928 freq = pll_freq_arr[2]; 929 } else { 930 /* Backward compatibility */ 931 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 932 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 933 nr = RREG32(mmPSOC_CPU_PLL_NR); 934 nf = RREG32(mmPSOC_CPU_PLL_NF); 935 od = RREG32(mmPSOC_CPU_PLL_OD); 936 937 if (div_sel == DIV_SEL_REF_CLK || 938 div_sel == DIV_SEL_DIVIDED_REF) { 939 if (div_sel == DIV_SEL_REF_CLK) 940 freq = PLL_REF_CLK; 941 else 942 freq = PLL_REF_CLK / (div_fctr + 1); 943 } else if (div_sel == DIV_SEL_PLL_CLK || 944 div_sel == DIV_SEL_DIVIDED_PLL) { 945 pll_clk = PLL_REF_CLK * (nf + 1) / 946 ((nr + 1) * (od + 1)); 947 if (div_sel == DIV_SEL_PLL_CLK) 948 freq = pll_clk; 949 else 950 freq = pll_clk / (div_fctr + 1); 951 } else { 952 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 953 freq = 0; 954 } 955 } 956 957 prop->psoc_timestamp_frequency = freq; 958 prop->psoc_pci_pll_nr = nr; 959 prop->psoc_pci_pll_nf = nf; 960 prop->psoc_pci_pll_od = od; 961 prop->psoc_pci_pll_div_factor = div_fctr; 962 963 return 0; 964 } 965 966 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 967 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 968 { 969 struct asic_fixed_properties *prop = &hdev->asic_prop; 970 struct packet_lin_dma *init_tpc_mem_pkt; 971 struct hl_cs_job *job; 972 struct hl_cb *cb; 973 u64 dst_addr; 974 u32 cb_size, ctl; 975 u8 tpc_id; 976 int rc; 977 978 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 979 if (!cb) 980 return -EFAULT; 981 982 init_tpc_mem_pkt = cb->kernel_address; 983 cb_size = sizeof(*init_tpc_mem_pkt); 984 memset(init_tpc_mem_pkt, 0, cb_size); 985 986 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 987 988 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 989 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 991 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 992 993 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 994 995 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 996 997 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 998 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 999 round_up(prop->sram_user_base_address, SZ_8K)); 1000 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1001 1002 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1003 if (!job) { 1004 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1005 rc = -ENOMEM; 1006 goto release_cb; 1007 } 1008 1009 job->id = 0; 1010 job->user_cb = cb; 1011 atomic_inc(&job->user_cb->cs_cnt); 1012 job->user_cb_size = cb_size; 1013 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1014 job->patched_cb = job->user_cb; 1015 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1016 1017 hl_debugfs_add_job(hdev, job); 1018 1019 rc = gaudi_send_job_on_qman0(hdev, job); 1020 1021 if (rc) 1022 goto free_job; 1023 1024 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1025 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1026 if (rc) 1027 break; 1028 } 1029 1030 free_job: 1031 hl_userptr_delete_list(hdev, &job->userptr_list); 1032 hl_debugfs_remove_job(hdev, job); 1033 kfree(job); 1034 atomic_dec(&cb->cs_cnt); 1035 1036 release_cb: 1037 hl_cb_put(cb); 1038 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1039 1040 return rc; 1041 } 1042 1043 /* 1044 * gaudi_init_tpc_mem() - Initialize TPC memories. 1045 * @hdev: Pointer to hl_device structure. 1046 * 1047 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1048 * 1049 * Return: 0 for success, negative value for error. 1050 */ 1051 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1052 { 1053 const struct firmware *fw; 1054 size_t fw_size; 1055 void *cpu_addr; 1056 dma_addr_t dma_handle; 1057 int rc, count = 5; 1058 1059 again: 1060 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1061 if (rc == -EINTR && count-- > 0) { 1062 msleep(50); 1063 goto again; 1064 } 1065 1066 if (rc) { 1067 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1068 GAUDI_TPC_FW_FILE); 1069 goto out; 1070 } 1071 1072 fw_size = fw->size; 1073 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1074 if (!cpu_addr) { 1075 dev_err(hdev->dev, 1076 "Failed to allocate %zu of dma memory for TPC kernel\n", 1077 fw_size); 1078 rc = -ENOMEM; 1079 goto out; 1080 } 1081 1082 memcpy(cpu_addr, fw->data, fw_size); 1083 1084 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1085 1086 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1087 1088 out: 1089 release_firmware(fw); 1090 return rc; 1091 } 1092 1093 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1094 { 1095 struct gaudi_device *gaudi = hdev->asic_specific; 1096 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1097 struct hl_hw_queue *q; 1098 u32 i, sob_id, sob_group_id, queue_id; 1099 1100 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1101 sob_group_id = 1102 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1103 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1104 1105 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1106 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1107 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1108 q->sync_stream_prop.collective_sob_id = sob_id + i; 1109 } 1110 1111 /* Both DMA5 and TPC7 use the same resources since only a single 1112 * engine need to participate in the reduction process 1113 */ 1114 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1115 q = &hdev->kernel_queues[queue_id]; 1116 q->sync_stream_prop.collective_sob_id = 1117 sob_id + NIC_NUMBER_OF_ENGINES; 1118 1119 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1120 q = &hdev->kernel_queues[queue_id]; 1121 q->sync_stream_prop.collective_sob_id = 1122 sob_id + NIC_NUMBER_OF_ENGINES; 1123 } 1124 1125 static void gaudi_sob_group_hw_reset(struct kref *ref) 1126 { 1127 struct gaudi_hw_sob_group *hw_sob_group = 1128 container_of(ref, struct gaudi_hw_sob_group, kref); 1129 struct hl_device *hdev = hw_sob_group->hdev; 1130 int i; 1131 1132 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1133 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1134 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1135 1136 kref_init(&hw_sob_group->kref); 1137 } 1138 1139 static void gaudi_sob_group_reset_error(struct kref *ref) 1140 { 1141 struct gaudi_hw_sob_group *hw_sob_group = 1142 container_of(ref, struct gaudi_hw_sob_group, kref); 1143 struct hl_device *hdev = hw_sob_group->hdev; 1144 1145 dev_crit(hdev->dev, 1146 "SOB release shouldn't be called here, base_sob_id: %d\n", 1147 hw_sob_group->base_sob_id); 1148 } 1149 1150 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1151 { 1152 struct gaudi_collective_properties *prop; 1153 int i; 1154 1155 prop = &gaudi->collective_props; 1156 1157 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1158 1159 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1160 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1161 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1162 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1163 /* Set collective engine bit */ 1164 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1165 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1166 } 1167 1168 static int gaudi_collective_init(struct hl_device *hdev) 1169 { 1170 u32 i, sob_id, reserved_sobs_per_group; 1171 struct gaudi_collective_properties *prop; 1172 struct gaudi_device *gaudi; 1173 1174 gaudi = hdev->asic_specific; 1175 prop = &gaudi->collective_props; 1176 sob_id = hdev->asic_prop.collective_first_sob; 1177 1178 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1179 reserved_sobs_per_group = 1180 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1181 1182 /* Init SOB groups */ 1183 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1184 prop->hw_sob_group[i].hdev = hdev; 1185 prop->hw_sob_group[i].base_sob_id = sob_id; 1186 sob_id += reserved_sobs_per_group; 1187 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1188 } 1189 1190 for (i = 0 ; i < QMAN_STREAMS; i++) { 1191 prop->next_sob_group_val[i] = 1; 1192 prop->curr_sob_group_idx[i] = 0; 1193 gaudi_collective_map_sobs(hdev, i); 1194 } 1195 1196 gaudi_collective_mstr_sob_mask_set(gaudi); 1197 1198 return 0; 1199 } 1200 1201 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1202 { 1203 struct gaudi_device *gaudi = hdev->asic_specific; 1204 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1205 1206 kref_put(&cprop->hw_sob_group[sob_group].kref, 1207 gaudi_sob_group_hw_reset); 1208 } 1209 1210 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1211 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1212 { 1213 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1214 struct gaudi_collective_properties *cprop; 1215 struct hl_gen_wait_properties wait_prop; 1216 struct hl_sync_stream_properties *prop; 1217 struct gaudi_device *gaudi; 1218 1219 gaudi = hdev->asic_specific; 1220 cprop = &gaudi->collective_props; 1221 queue_id = job->hw_queue_id; 1222 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1223 1224 master_sob_base = 1225 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1226 master_monitor = prop->collective_mstr_mon_id[0]; 1227 1228 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1229 1230 dev_dbg(hdev->dev, 1231 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1232 master_sob_base, cprop->mstr_sob_mask[0], 1233 cprop->next_sob_group_val[stream], 1234 master_monitor, queue_id); 1235 1236 wait_prop.data = (void *) job->patched_cb; 1237 wait_prop.sob_base = master_sob_base; 1238 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1239 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1240 wait_prop.mon_id = master_monitor; 1241 wait_prop.q_idx = queue_id; 1242 wait_prop.size = cb_size; 1243 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1244 1245 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1246 master_monitor = prop->collective_mstr_mon_id[1]; 1247 1248 dev_dbg(hdev->dev, 1249 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1250 master_sob_base, cprop->mstr_sob_mask[1], 1251 cprop->next_sob_group_val[stream], 1252 master_monitor, queue_id); 1253 1254 wait_prop.sob_base = master_sob_base; 1255 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1256 wait_prop.mon_id = master_monitor; 1257 wait_prop.size = cb_size; 1258 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1259 } 1260 1261 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1262 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1263 { 1264 struct hl_gen_wait_properties wait_prop; 1265 struct hl_sync_stream_properties *prop; 1266 u32 queue_id, cb_size = 0; 1267 1268 queue_id = job->hw_queue_id; 1269 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1270 1271 if (job->cs->encaps_signals) { 1272 /* use the encaps signal handle store earlier in the flow 1273 * and set the SOB information from the encaps 1274 * signals handle 1275 */ 1276 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1277 cs_cmpl); 1278 1279 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1280 job->cs->sequence, 1281 cs_cmpl->hw_sob->sob_id, 1282 cs_cmpl->sob_val); 1283 } 1284 1285 /* Add to wait CBs using slave monitor */ 1286 wait_prop.data = (void *) job->user_cb; 1287 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1288 wait_prop.sob_mask = 0x1; 1289 wait_prop.sob_val = cs_cmpl->sob_val; 1290 wait_prop.mon_id = prop->collective_slave_mon_id; 1291 wait_prop.q_idx = queue_id; 1292 wait_prop.size = cb_size; 1293 1294 dev_dbg(hdev->dev, 1295 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1296 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1297 prop->collective_slave_mon_id, queue_id); 1298 1299 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1300 1301 dev_dbg(hdev->dev, 1302 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1303 prop->collective_sob_id, queue_id); 1304 1305 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1306 prop->collective_sob_id, cb_size, false); 1307 } 1308 1309 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1310 { 1311 struct hl_cs_compl *signal_cs_cmpl = 1312 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1313 struct hl_cs_compl *cs_cmpl = 1314 container_of(cs->fence, struct hl_cs_compl, base_fence); 1315 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1316 struct gaudi_collective_properties *cprop; 1317 u32 stream, queue_id, sob_group_offset; 1318 struct gaudi_device *gaudi; 1319 struct hl_device *hdev; 1320 struct hl_cs_job *job; 1321 struct hl_ctx *ctx; 1322 1323 ctx = cs->ctx; 1324 hdev = ctx->hdev; 1325 gaudi = hdev->asic_specific; 1326 cprop = &gaudi->collective_props; 1327 1328 if (cs->encaps_signals) { 1329 cs_cmpl->hw_sob = handle->hw_sob; 1330 /* at this checkpoint we only need the hw_sob pointer 1331 * for the completion check before start going over the jobs 1332 * of the master/slaves, the sob_value will be taken later on 1333 * in gaudi_collective_slave_init_job depends on each 1334 * job wait offset value. 1335 */ 1336 cs_cmpl->sob_val = 0; 1337 } else { 1338 /* copy the SOB id and value of the signal CS */ 1339 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1340 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1341 } 1342 1343 /* check again if the signal cs already completed. 1344 * if yes then don't send any wait cs since the hw_sob 1345 * could be in reset already. if signal is not completed 1346 * then get refcount to hw_sob to prevent resetting the sob 1347 * while wait cs is not submitted. 1348 * note that this check is protected by two locks, 1349 * hw queue lock and completion object lock, 1350 * and the same completion object lock also protects 1351 * the hw_sob reset handler function. 1352 * The hw_queue lock prevent out of sync of hw_sob 1353 * refcount value, changed by signal/wait flows. 1354 */ 1355 spin_lock(&signal_cs_cmpl->lock); 1356 1357 if (completion_done(&cs->signal_fence->completion)) { 1358 spin_unlock(&signal_cs_cmpl->lock); 1359 return -EINVAL; 1360 } 1361 /* Increment kref since all slave queues are now waiting on it */ 1362 kref_get(&cs_cmpl->hw_sob->kref); 1363 1364 spin_unlock(&signal_cs_cmpl->lock); 1365 1366 /* Calculate the stream from collective master queue (1st job) */ 1367 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1368 stream = job->hw_queue_id % 4; 1369 sob_group_offset = 1370 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1371 1372 list_for_each_entry(job, &cs->job_list, cs_node) { 1373 queue_id = job->hw_queue_id; 1374 1375 if (hdev->kernel_queues[queue_id].collective_mode == 1376 HL_COLLECTIVE_MASTER) 1377 gaudi_collective_master_init_job(hdev, job, stream, 1378 sob_group_offset); 1379 else 1380 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1381 } 1382 1383 cs_cmpl->sob_group = sob_group_offset; 1384 1385 /* Handle sob group kref and wraparound */ 1386 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1387 cprop->next_sob_group_val[stream]++; 1388 1389 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1390 /* 1391 * Decrement as we reached the max value. 1392 * The release function won't be called here as we've 1393 * just incremented the refcount. 1394 */ 1395 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1396 gaudi_sob_group_reset_error); 1397 cprop->next_sob_group_val[stream] = 1; 1398 /* only two SOBs are currently in use */ 1399 cprop->curr_sob_group_idx[stream] = 1400 (cprop->curr_sob_group_idx[stream] + 1) & 1401 (HL_RSVD_SOBS - 1); 1402 1403 gaudi_collective_map_sobs(hdev, stream); 1404 1405 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1406 cprop->curr_sob_group_idx[stream], stream); 1407 } 1408 1409 mb(); 1410 hl_fence_put(cs->signal_fence); 1411 cs->signal_fence = NULL; 1412 1413 return 0; 1414 } 1415 1416 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1417 { 1418 u32 cacheline_end, additional_commands; 1419 1420 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1421 additional_commands = sizeof(struct packet_msg_prot) * 2; 1422 1423 if (user_cb_size + additional_commands > cacheline_end) 1424 return cacheline_end - user_cb_size + additional_commands; 1425 else 1426 return additional_commands; 1427 } 1428 1429 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1430 struct hl_ctx *ctx, struct hl_cs *cs, 1431 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1432 u32 encaps_signal_offset) 1433 { 1434 struct hw_queue_properties *hw_queue_prop; 1435 struct hl_cs_counters_atomic *cntr; 1436 struct hl_cs_job *job; 1437 struct hl_cb *cb; 1438 u32 cb_size; 1439 bool patched_cb; 1440 1441 cntr = &hdev->aggregated_cs_counters; 1442 1443 if (mode == HL_COLLECTIVE_MASTER) { 1444 /* CB size of collective master queue contains 1445 * 4 msg short packets for monitor 1 configuration 1446 * 1 fence packet 1447 * 4 msg short packets for monitor 2 configuration 1448 * 1 fence packet 1449 * 2 msg prot packets for completion and MSI 1450 */ 1451 cb_size = sizeof(struct packet_msg_short) * 8 + 1452 sizeof(struct packet_fence) * 2 + 1453 sizeof(struct packet_msg_prot) * 2; 1454 patched_cb = true; 1455 } else { 1456 /* CB size of collective slave queues contains 1457 * 4 msg short packets for monitor configuration 1458 * 1 fence packet 1459 * 1 additional msg short packet for sob signal 1460 */ 1461 cb_size = sizeof(struct packet_msg_short) * 5 + 1462 sizeof(struct packet_fence); 1463 patched_cb = false; 1464 } 1465 1466 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1467 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1468 if (!job) { 1469 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1470 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1471 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1472 return -ENOMEM; 1473 } 1474 1475 /* Allocate internal mapped CB for non patched CBs */ 1476 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb); 1477 if (!cb) { 1478 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1479 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1480 kfree(job); 1481 return -EFAULT; 1482 } 1483 1484 job->id = 0; 1485 job->cs = cs; 1486 job->user_cb = cb; 1487 atomic_inc(&job->user_cb->cs_cnt); 1488 job->user_cb_size = cb_size; 1489 job->hw_queue_id = queue_id; 1490 1491 /* since its guaranteed to have only one chunk in the collective wait 1492 * cs, we can use this chunk to set the encapsulated signal offset 1493 * in the jobs. 1494 */ 1495 if (cs->encaps_signals) 1496 job->encaps_sig_wait_offset = encaps_signal_offset; 1497 1498 /* 1499 * No need in parsing, user CB is the patched CB. 1500 * We call hl_cb_destroy() out of two reasons - we don't need 1501 * the CB in the CB idr anymore and to decrement its refcount as 1502 * it was incremented inside hl_cb_kernel_create(). 1503 */ 1504 if (patched_cb) 1505 job->patched_cb = job->user_cb; 1506 else 1507 job->patched_cb = NULL; 1508 1509 job->job_cb_size = job->user_cb_size; 1510 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1511 1512 /* increment refcount as for external queues we get completion */ 1513 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1514 cs_get(cs); 1515 1516 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1517 1518 list_add_tail(&job->cs_node, &cs->job_list); 1519 1520 hl_debugfs_add_job(hdev, job); 1521 1522 return 0; 1523 } 1524 1525 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1526 struct hl_ctx *ctx, struct hl_cs *cs, 1527 u32 wait_queue_id, u32 collective_engine_id, 1528 u32 encaps_signal_offset) 1529 { 1530 struct gaudi_device *gaudi = hdev->asic_specific; 1531 struct hw_queue_properties *hw_queue_prop; 1532 u32 queue_id, collective_queue, num_jobs; 1533 u32 stream, nic_queue, nic_idx = 0; 1534 bool skip; 1535 int i, rc = 0; 1536 1537 /* Verify wait queue id is configured as master */ 1538 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1539 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1540 dev_err(hdev->dev, 1541 "Queue %d is not configured as collective master\n", 1542 wait_queue_id); 1543 return -EINVAL; 1544 } 1545 1546 /* Verify engine id is supported */ 1547 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1548 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1549 dev_err(hdev->dev, 1550 "Collective wait does not support engine %u\n", 1551 collective_engine_id); 1552 return -EINVAL; 1553 } 1554 1555 stream = wait_queue_id % 4; 1556 1557 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1558 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1559 else 1560 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1561 1562 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1563 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1564 1565 /* First job goes to the collective master queue, it will wait for 1566 * the collective slave queues to finish execution. 1567 * The synchronization is done using two monitors: 1568 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1569 * reduction engine (DMA5/TPC7). 1570 * 1571 * Rest of the jobs goes to the collective slave queues which will 1572 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1573 */ 1574 for (i = 0 ; i < num_jobs ; i++) { 1575 if (i == 0) { 1576 queue_id = wait_queue_id; 1577 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1578 HL_COLLECTIVE_MASTER, queue_id, 1579 wait_queue_id, encaps_signal_offset); 1580 } else { 1581 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1582 if (gaudi->hw_cap_initialized & 1583 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1584 skip = false; 1585 else 1586 skip = true; 1587 1588 queue_id = nic_queue; 1589 nic_queue += 4; 1590 nic_idx++; 1591 1592 if (skip) 1593 continue; 1594 } else { 1595 queue_id = collective_queue; 1596 } 1597 1598 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1599 HL_COLLECTIVE_SLAVE, queue_id, 1600 wait_queue_id, encaps_signal_offset); 1601 } 1602 1603 if (rc) 1604 return rc; 1605 } 1606 1607 return rc; 1608 } 1609 1610 static int gaudi_late_init(struct hl_device *hdev) 1611 { 1612 struct gaudi_device *gaudi = hdev->asic_specific; 1613 int rc; 1614 1615 rc = gaudi->cpucp_info_get(hdev); 1616 if (rc) { 1617 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1618 return rc; 1619 } 1620 1621 if ((hdev->card_type == cpucp_card_type_pci) && 1622 (hdev->nic_ports_mask & 0x3)) { 1623 dev_info(hdev->dev, 1624 "PCI card detected, only 8 ports are enabled\n"); 1625 hdev->nic_ports_mask &= ~0x3; 1626 1627 /* Stop and disable unused NIC QMANs */ 1628 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1629 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1630 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1631 1632 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1633 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1634 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1635 1636 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1637 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1638 1639 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1640 } 1641 1642 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1643 if (rc) { 1644 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1645 return rc; 1646 } 1647 1648 /* Scrub both SRAM and DRAM */ 1649 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1650 if (rc) 1651 goto disable_pci_access; 1652 1653 rc = gaudi_fetch_psoc_frequency(hdev); 1654 if (rc) { 1655 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1656 goto disable_pci_access; 1657 } 1658 1659 rc = gaudi_mmu_clear_pgt_range(hdev); 1660 if (rc) { 1661 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1662 goto disable_pci_access; 1663 } 1664 1665 rc = gaudi_init_tpc_mem(hdev); 1666 if (rc) { 1667 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1668 goto disable_pci_access; 1669 } 1670 1671 rc = gaudi_collective_init(hdev); 1672 if (rc) { 1673 dev_err(hdev->dev, "Failed to init collective\n"); 1674 goto disable_pci_access; 1675 } 1676 1677 /* We only support a single ASID for the user, so for the sake of optimization, just 1678 * initialize the ASID one time during device initialization with the fixed value of 1 1679 */ 1680 gaudi_mmu_prepare(hdev, 1); 1681 1682 hl_fw_set_pll_profile(hdev); 1683 1684 return 0; 1685 1686 disable_pci_access: 1687 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1688 1689 return rc; 1690 } 1691 1692 static void gaudi_late_fini(struct hl_device *hdev) 1693 { 1694 hl_hwmon_release_resources(hdev); 1695 } 1696 1697 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1698 { 1699 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1700 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1701 int i, j, rc = 0; 1702 1703 /* 1704 * The device CPU works with 40-bits addresses, while bit 39 must be set 1705 * to '1' when accessing the host. 1706 * Bits 49:39 of the full host address are saved for a later 1707 * configuration of the HW to perform extension to 50 bits. 1708 * Because there is a single HW register that holds the extension bits, 1709 * these bits must be identical in all allocated range. 1710 */ 1711 1712 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1713 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1714 &dma_addr_arr[i], 1715 GFP_KERNEL | __GFP_ZERO); 1716 if (!virt_addr_arr[i]) { 1717 rc = -ENOMEM; 1718 goto free_dma_mem_arr; 1719 } 1720 1721 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1722 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1723 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1724 break; 1725 } 1726 1727 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1728 dev_err(hdev->dev, 1729 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1730 rc = -EFAULT; 1731 goto free_dma_mem_arr; 1732 } 1733 1734 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1735 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1736 hdev->cpu_pci_msb_addr = 1737 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1738 1739 if (!hdev->asic_prop.fw_security_enabled) 1740 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1741 1742 free_dma_mem_arr: 1743 for (j = 0 ; j < i ; j++) 1744 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1745 dma_addr_arr[j]); 1746 1747 return rc; 1748 } 1749 1750 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1751 { 1752 struct gaudi_device *gaudi = hdev->asic_specific; 1753 struct gaudi_internal_qman_info *q; 1754 u32 i; 1755 1756 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1757 q = &gaudi->internal_qmans[i]; 1758 if (!q->pq_kernel_addr) 1759 continue; 1760 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1761 } 1762 } 1763 1764 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1765 { 1766 struct gaudi_device *gaudi = hdev->asic_specific; 1767 struct gaudi_internal_qman_info *q; 1768 int rc, i; 1769 1770 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1771 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1772 continue; 1773 1774 q = &gaudi->internal_qmans[i]; 1775 1776 switch (i) { 1777 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1778 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1779 break; 1780 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1781 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1782 break; 1783 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1784 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1785 break; 1786 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1787 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1788 break; 1789 default: 1790 dev_err(hdev->dev, "Bad internal queue index %d", i); 1791 rc = -EINVAL; 1792 goto free_internal_qmans_pq_mem; 1793 } 1794 1795 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1796 GFP_KERNEL | __GFP_ZERO); 1797 if (!q->pq_kernel_addr) { 1798 rc = -ENOMEM; 1799 goto free_internal_qmans_pq_mem; 1800 } 1801 } 1802 1803 return 0; 1804 1805 free_internal_qmans_pq_mem: 1806 gaudi_free_internal_qmans_pq_mem(hdev); 1807 return rc; 1808 } 1809 1810 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1811 { 1812 struct asic_fixed_properties *prop = &hdev->asic_prop; 1813 struct pci_mem_region *region; 1814 1815 /* CFG */ 1816 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1817 region->region_base = CFG_BASE; 1818 region->region_size = CFG_SIZE; 1819 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1820 region->bar_size = CFG_BAR_SIZE; 1821 region->bar_id = CFG_BAR_ID; 1822 region->used = 1; 1823 1824 /* SRAM */ 1825 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1826 region->region_base = SRAM_BASE_ADDR; 1827 region->region_size = SRAM_SIZE; 1828 region->offset_in_bar = 0; 1829 region->bar_size = SRAM_BAR_SIZE; 1830 region->bar_id = SRAM_BAR_ID; 1831 region->used = 1; 1832 1833 /* DRAM */ 1834 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1835 region->region_base = DRAM_PHYS_BASE; 1836 region->region_size = hdev->asic_prop.dram_size; 1837 region->offset_in_bar = 0; 1838 region->bar_size = prop->dram_pci_bar_size; 1839 region->bar_id = HBM_BAR_ID; 1840 region->used = 1; 1841 1842 /* SP SRAM */ 1843 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1844 region->region_base = PSOC_SCRATCHPAD_ADDR; 1845 region->region_size = PSOC_SCRATCHPAD_SIZE; 1846 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1847 region->bar_size = CFG_BAR_SIZE; 1848 region->bar_id = CFG_BAR_ID; 1849 region->used = 1; 1850 } 1851 1852 static int gaudi_sw_init(struct hl_device *hdev) 1853 { 1854 struct gaudi_device *gaudi; 1855 u32 i, event_id = 0; 1856 int rc; 1857 1858 /* Allocate device structure */ 1859 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1860 if (!gaudi) 1861 return -ENOMEM; 1862 1863 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1864 if (gaudi_irq_map_table[i].valid) { 1865 if (event_id == GAUDI_EVENT_SIZE) { 1866 dev_err(hdev->dev, 1867 "Event array exceeds the limit of %u events\n", 1868 GAUDI_EVENT_SIZE); 1869 rc = -EINVAL; 1870 goto free_gaudi_device; 1871 } 1872 1873 gaudi->events[event_id++] = 1874 gaudi_irq_map_table[i].fc_id; 1875 } 1876 } 1877 1878 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1879 1880 hdev->asic_specific = gaudi; 1881 1882 /* Create DMA pool for small allocations */ 1883 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1884 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1885 if (!hdev->dma_pool) { 1886 dev_err(hdev->dev, "failed to create DMA pool\n"); 1887 rc = -ENOMEM; 1888 goto free_gaudi_device; 1889 } 1890 1891 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1892 if (rc) 1893 goto free_dma_pool; 1894 1895 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1896 if (!hdev->cpu_accessible_dma_pool) { 1897 dev_err(hdev->dev, 1898 "Failed to create CPU accessible DMA pool\n"); 1899 rc = -ENOMEM; 1900 goto free_cpu_dma_mem; 1901 } 1902 1903 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1904 (uintptr_t) hdev->cpu_accessible_dma_mem, 1905 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1906 if (rc) { 1907 dev_err(hdev->dev, 1908 "Failed to add memory to CPU accessible DMA pool\n"); 1909 rc = -EFAULT; 1910 goto free_cpu_accessible_dma_pool; 1911 } 1912 1913 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1914 if (rc) 1915 goto free_cpu_accessible_dma_pool; 1916 1917 spin_lock_init(&gaudi->hw_queues_lock); 1918 1919 hdev->supports_sync_stream = true; 1920 hdev->supports_coresight = true; 1921 hdev->supports_staged_submission = true; 1922 hdev->supports_wait_for_multi_cs = true; 1923 1924 hdev->asic_funcs->set_pci_memory_regions(hdev); 1925 hdev->stream_master_qid_arr = 1926 hdev->asic_funcs->get_stream_master_qid_arr(); 1927 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1928 1929 return 0; 1930 1931 free_cpu_accessible_dma_pool: 1932 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1933 free_cpu_dma_mem: 1934 if (!hdev->asic_prop.fw_security_enabled) 1935 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1936 hdev->cpu_pci_msb_addr); 1937 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1938 hdev->cpu_accessible_dma_address); 1939 free_dma_pool: 1940 dma_pool_destroy(hdev->dma_pool); 1941 free_gaudi_device: 1942 kfree(gaudi); 1943 return rc; 1944 } 1945 1946 static int gaudi_sw_fini(struct hl_device *hdev) 1947 { 1948 struct gaudi_device *gaudi = hdev->asic_specific; 1949 1950 gaudi_free_internal_qmans_pq_mem(hdev); 1951 1952 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1953 1954 if (!hdev->asic_prop.fw_security_enabled) 1955 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1956 hdev->cpu_pci_msb_addr); 1957 1958 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1959 hdev->cpu_accessible_dma_address); 1960 1961 dma_pool_destroy(hdev->dma_pool); 1962 1963 kfree(gaudi); 1964 1965 return 0; 1966 } 1967 1968 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1969 { 1970 struct hl_device *hdev = arg; 1971 int i; 1972 1973 if (hdev->disabled) 1974 return IRQ_HANDLED; 1975 1976 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1977 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1978 1979 hl_irq_handler_eq(irq, &hdev->event_queue); 1980 1981 return IRQ_HANDLED; 1982 } 1983 1984 /* 1985 * For backward compatibility, new MSI interrupts should be set after the 1986 * existing CPU and NIC interrupts. 1987 */ 1988 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1989 bool cpu_eq) 1990 { 1991 int msi_vec; 1992 1993 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1994 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1995 GAUDI_EVENT_QUEUE_MSI_IDX); 1996 1997 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1998 (nr + NIC_NUMBER_OF_ENGINES + 1); 1999 2000 return pci_irq_vector(hdev->pdev, msi_vec); 2001 } 2002 2003 static int gaudi_enable_msi_single(struct hl_device *hdev) 2004 { 2005 int rc, irq; 2006 2007 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2008 2009 irq = gaudi_pci_irq_vector(hdev, 0, false); 2010 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2011 "gaudi single msi", hdev); 2012 if (rc) 2013 dev_err(hdev->dev, 2014 "Failed to request single MSI IRQ\n"); 2015 2016 return rc; 2017 } 2018 2019 static int gaudi_enable_msi(struct hl_device *hdev) 2020 { 2021 struct gaudi_device *gaudi = hdev->asic_specific; 2022 int rc; 2023 2024 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2025 return 0; 2026 2027 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2028 if (rc < 0) { 2029 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2030 return rc; 2031 } 2032 2033 rc = gaudi_enable_msi_single(hdev); 2034 if (rc) 2035 goto free_pci_irq_vectors; 2036 2037 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2038 2039 return 0; 2040 2041 free_pci_irq_vectors: 2042 pci_free_irq_vectors(hdev->pdev); 2043 return rc; 2044 } 2045 2046 static void gaudi_sync_irqs(struct hl_device *hdev) 2047 { 2048 struct gaudi_device *gaudi = hdev->asic_specific; 2049 2050 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2051 return; 2052 2053 /* Wait for all pending IRQs to be finished */ 2054 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2055 } 2056 2057 static void gaudi_disable_msi(struct hl_device *hdev) 2058 { 2059 struct gaudi_device *gaudi = hdev->asic_specific; 2060 2061 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2062 return; 2063 2064 gaudi_sync_irqs(hdev); 2065 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2066 pci_free_irq_vectors(hdev->pdev); 2067 2068 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2069 } 2070 2071 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2072 { 2073 struct gaudi_device *gaudi = hdev->asic_specific; 2074 2075 if (hdev->asic_prop.fw_security_enabled) 2076 return; 2077 2078 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2079 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2080 return; 2081 2082 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2083 return; 2084 2085 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2086 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2087 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2088 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2089 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2090 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2091 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2092 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2093 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2094 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2095 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2096 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2097 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2098 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2099 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2101 2102 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2103 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2104 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2105 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2106 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2107 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2108 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2109 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2110 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2111 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2112 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2113 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2114 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2115 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2116 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2117 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2118 2119 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2120 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2121 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2122 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2123 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2124 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2125 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2126 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2127 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2128 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2129 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2130 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2131 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2132 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2133 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2134 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2135 2136 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2137 } 2138 2139 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2140 { 2141 struct gaudi_device *gaudi = hdev->asic_specific; 2142 2143 if (hdev->asic_prop.fw_security_enabled) 2144 return; 2145 2146 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2147 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2148 return; 2149 2150 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2151 return; 2152 2153 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2154 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2155 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2156 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2157 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2158 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2159 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2160 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2161 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2162 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2163 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2164 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2165 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2166 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2167 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2169 2170 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2171 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2172 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2173 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2174 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2175 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2176 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2177 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2178 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2179 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2180 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2181 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2182 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2183 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2184 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2185 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2186 2187 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2188 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2189 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2190 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2191 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2192 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2193 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2194 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2195 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2196 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2197 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2198 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2199 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2200 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2201 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2202 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2203 2204 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2205 } 2206 2207 static void gaudi_init_e2e(struct hl_device *hdev) 2208 { 2209 if (hdev->asic_prop.fw_security_enabled) 2210 return; 2211 2212 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2213 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2214 return; 2215 2216 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2217 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2218 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2219 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2220 2221 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2222 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2223 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2224 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2225 2226 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2227 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2228 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2229 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2230 2231 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2232 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2233 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2234 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2235 2236 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2237 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2238 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2239 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2240 2241 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2242 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2243 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2244 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2245 2246 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2247 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2248 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2249 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2250 2251 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2252 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2253 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2254 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2255 2256 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2257 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2258 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2259 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2260 2261 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2262 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2263 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2264 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2265 2266 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2267 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2268 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2269 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2270 2271 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2272 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2273 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2274 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2275 2276 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2277 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2278 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2279 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2280 2281 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2282 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2283 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2284 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2285 2286 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2287 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2288 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2289 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2290 2291 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2292 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2293 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2294 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2295 2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2297 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2298 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2299 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2300 2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2302 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2303 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2304 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2305 2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2307 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2308 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2309 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2310 2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2312 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2313 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2314 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2315 2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2317 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2318 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2319 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2320 2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2322 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2323 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2324 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2325 2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2327 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2328 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2329 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2330 2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2332 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2333 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2334 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2335 2336 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2337 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2338 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2339 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2340 2341 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2342 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2343 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2344 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2345 2346 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2347 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2348 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2349 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2350 2351 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2352 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2353 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2354 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2355 2356 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2357 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2358 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2359 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2360 2361 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2362 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2363 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2364 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2365 2366 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2367 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2368 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2369 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2370 2371 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2372 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2373 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2374 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2375 2376 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2377 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2378 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2379 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2380 2381 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2382 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2383 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2384 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2385 2386 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2387 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2388 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2389 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2390 2391 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2392 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2393 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2394 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2395 2396 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2397 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2398 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2399 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2400 2401 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2402 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2403 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2404 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2405 2406 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2407 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2408 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2409 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2410 2411 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2412 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2413 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2414 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2415 2416 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2417 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2418 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2419 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2420 2421 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2422 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2423 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2424 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2425 2426 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2427 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2428 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2429 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2430 2431 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2432 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2433 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2434 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2435 2436 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2437 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2438 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2439 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2440 2441 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2442 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2443 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2444 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2445 2446 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2447 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2448 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2449 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2450 2451 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2452 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2453 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2454 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2455 } 2456 2457 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2458 { 2459 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2460 2461 if (hdev->asic_prop.fw_security_enabled) 2462 return; 2463 2464 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2465 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2466 return; 2467 2468 hbm0_wr = 0x33333333; 2469 hbm0_rd = 0x77777777; 2470 hbm1_wr = 0x55555555; 2471 hbm1_rd = 0xDDDDDDDD; 2472 2473 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2474 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2475 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2476 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2477 2478 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2479 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2480 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2481 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2482 2483 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2484 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2485 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2486 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2487 2488 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2489 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2490 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2491 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2492 2493 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2496 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2499 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2502 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2503 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2504 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2505 2506 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2509 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2512 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2515 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2516 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2517 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2518 } 2519 2520 static void gaudi_init_golden_registers(struct hl_device *hdev) 2521 { 2522 u32 tpc_offset; 2523 int tpc_id, i; 2524 2525 gaudi_init_e2e(hdev); 2526 gaudi_init_hbm_cred(hdev); 2527 2528 for (tpc_id = 0, tpc_offset = 0; 2529 tpc_id < TPC_NUMBER_OF_ENGINES; 2530 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2531 /* Mask all arithmetic interrupts from TPC */ 2532 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2533 /* Set 16 cache lines */ 2534 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2535 ICACHE_FETCH_LINE_NUM, 2); 2536 } 2537 2538 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2539 for (i = 0 ; i < 128 ; i += 8) 2540 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2541 2542 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2543 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2544 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2545 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2546 } 2547 2548 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2549 int qman_id, dma_addr_t qman_pq_addr) 2550 { 2551 struct cpu_dyn_regs *dyn_regs = 2552 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2553 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2554 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2555 u32 q_off, dma_qm_offset; 2556 u32 dma_qm_err_cfg, irq_handler_offset; 2557 2558 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2559 2560 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2561 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2562 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2563 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2564 so_base_en_lo = lower_32_bits(CFG_BASE + 2565 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2566 so_base_en_hi = upper_32_bits(CFG_BASE + 2567 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2568 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2569 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2570 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2571 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2572 so_base_ws_lo = lower_32_bits(CFG_BASE + 2573 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2574 so_base_ws_hi = upper_32_bits(CFG_BASE + 2575 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2576 2577 q_off = dma_qm_offset + qman_id * 4; 2578 2579 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2580 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2581 2582 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2583 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2584 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2585 2586 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2587 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2588 QMAN_LDMA_SRC_OFFSET); 2589 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2590 QMAN_LDMA_DST_OFFSET); 2591 2592 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2593 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2594 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2595 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2596 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2597 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2598 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2599 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2600 2601 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2602 2603 /* The following configuration is needed only once per QMAN */ 2604 if (qman_id == 0) { 2605 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2606 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2607 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2608 2609 /* Configure RAZWI IRQ */ 2610 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2611 if (hdev->stop_on_err) 2612 dma_qm_err_cfg |= 2613 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2614 2615 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2616 2617 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2618 lower_32_bits(CFG_BASE + irq_handler_offset)); 2619 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2620 upper_32_bits(CFG_BASE + irq_handler_offset)); 2621 2622 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2623 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2624 dma_id); 2625 2626 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2627 QM_ARB_ERR_MSG_EN_MASK); 2628 2629 /* Set timeout to maximum */ 2630 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2631 2632 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2633 QMAN_EXTERNAL_MAKE_TRUSTED); 2634 2635 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2636 } 2637 } 2638 2639 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2640 { 2641 struct cpu_dyn_regs *dyn_regs = 2642 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2643 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2644 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2645 u32 irq_handler_offset; 2646 2647 /* Set to maximum possible according to physical size */ 2648 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2649 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2650 2651 /* WA for H/W bug H3-2116 */ 2652 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2653 2654 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2655 if (hdev->stop_on_err) 2656 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2657 2658 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2659 2660 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2661 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2662 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2663 2664 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2665 lower_32_bits(CFG_BASE + irq_handler_offset)); 2666 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2667 upper_32_bits(CFG_BASE + irq_handler_offset)); 2668 2669 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2670 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2671 WREG32(mmDMA0_CORE_PROT + dma_offset, 2672 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2673 /* If the channel is secured, it should be in MMU bypass mode */ 2674 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2675 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2676 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2677 } 2678 2679 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2680 u32 enable_mask) 2681 { 2682 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2683 2684 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2685 } 2686 2687 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2688 { 2689 struct gaudi_device *gaudi = hdev->asic_specific; 2690 struct hl_hw_queue *q; 2691 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2692 2693 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2694 return; 2695 2696 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2697 dma_id = gaudi_dma_assignment[i]; 2698 /* 2699 * For queues after the CPU Q need to add 1 to get the correct 2700 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2701 * order to get the correct MSI register. 2702 */ 2703 if (dma_id > 1) { 2704 cpu_skip = 1; 2705 nic_skip = NIC_NUMBER_OF_ENGINES; 2706 } else { 2707 cpu_skip = 0; 2708 nic_skip = 0; 2709 } 2710 2711 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2712 q_idx = 4 * dma_id + j + cpu_skip; 2713 q = &hdev->kernel_queues[q_idx]; 2714 q->cq_id = cq_id++; 2715 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2716 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2717 q->bus_address); 2718 } 2719 2720 gaudi_init_dma_core(hdev, dma_id); 2721 2722 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2723 } 2724 2725 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2726 } 2727 2728 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2729 int qman_id, u64 qman_base_addr) 2730 { 2731 struct cpu_dyn_regs *dyn_regs = 2732 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2733 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2734 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2735 u32 dma_qm_err_cfg, irq_handler_offset; 2736 u32 q_off, dma_qm_offset; 2737 2738 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2739 2740 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2741 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2742 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2743 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2744 so_base_en_lo = lower_32_bits(CFG_BASE + 2745 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2746 so_base_en_hi = upper_32_bits(CFG_BASE + 2747 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2748 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2749 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2750 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2751 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2752 so_base_ws_lo = lower_32_bits(CFG_BASE + 2753 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2754 so_base_ws_hi = upper_32_bits(CFG_BASE + 2755 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2756 2757 q_off = dma_qm_offset + qman_id * 4; 2758 2759 if (qman_id < 4) { 2760 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2761 lower_32_bits(qman_base_addr)); 2762 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2763 upper_32_bits(qman_base_addr)); 2764 2765 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2766 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2767 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2768 2769 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2770 QMAN_CPDMA_SIZE_OFFSET); 2771 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2772 QMAN_CPDMA_SRC_OFFSET); 2773 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2774 QMAN_CPDMA_DST_OFFSET); 2775 } else { 2776 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2777 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2778 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2779 2780 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2781 QMAN_LDMA_SIZE_OFFSET); 2782 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2783 QMAN_LDMA_SRC_OFFSET); 2784 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2785 QMAN_LDMA_DST_OFFSET); 2786 2787 /* Configure RAZWI IRQ */ 2788 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2789 if (hdev->stop_on_err) 2790 dma_qm_err_cfg |= 2791 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2792 2793 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2794 2795 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2796 lower_32_bits(CFG_BASE + irq_handler_offset)); 2797 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2798 upper_32_bits(CFG_BASE + irq_handler_offset)); 2799 2800 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2801 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2802 dma_id); 2803 2804 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2805 QM_ARB_ERR_MSG_EN_MASK); 2806 2807 /* Set timeout to maximum */ 2808 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2809 2810 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2811 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2812 QMAN_INTERNAL_MAKE_TRUSTED); 2813 } 2814 2815 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2816 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2817 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2818 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2819 2820 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2821 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2822 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2823 mtr_base_ws_lo); 2824 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2825 mtr_base_ws_hi); 2826 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2827 so_base_ws_lo); 2828 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2829 so_base_ws_hi); 2830 } 2831 } 2832 2833 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2834 { 2835 struct gaudi_device *gaudi = hdev->asic_specific; 2836 struct gaudi_internal_qman_info *q; 2837 u64 qman_base_addr; 2838 int i, j, dma_id, internal_q_index; 2839 2840 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2841 return; 2842 2843 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2844 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2845 2846 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2847 /* 2848 * Add the CPU queue in order to get the correct queue 2849 * number as all internal queue are placed after it 2850 */ 2851 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2852 2853 q = &gaudi->internal_qmans[internal_q_index]; 2854 qman_base_addr = (u64) q->pq_dma_addr; 2855 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2856 qman_base_addr); 2857 } 2858 2859 /* Initializing lower CP for HBM DMA QMAN */ 2860 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2861 2862 gaudi_init_dma_core(hdev, dma_id); 2863 2864 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2865 } 2866 2867 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2868 } 2869 2870 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2871 int qman_id, u64 qman_base_addr) 2872 { 2873 struct cpu_dyn_regs *dyn_regs = 2874 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2875 u32 mtr_base_lo, mtr_base_hi; 2876 u32 so_base_lo, so_base_hi; 2877 u32 irq_handler_offset; 2878 u32 q_off, mme_id; 2879 u32 mme_qm_err_cfg; 2880 2881 mtr_base_lo = lower_32_bits(CFG_BASE + 2882 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2883 mtr_base_hi = upper_32_bits(CFG_BASE + 2884 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2885 so_base_lo = lower_32_bits(CFG_BASE + 2886 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2887 so_base_hi = upper_32_bits(CFG_BASE + 2888 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2889 2890 q_off = mme_offset + qman_id * 4; 2891 2892 if (qman_id < 4) { 2893 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2894 lower_32_bits(qman_base_addr)); 2895 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2896 upper_32_bits(qman_base_addr)); 2897 2898 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2899 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2900 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2901 2902 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2903 QMAN_CPDMA_SIZE_OFFSET); 2904 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2905 QMAN_CPDMA_SRC_OFFSET); 2906 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2907 QMAN_CPDMA_DST_OFFSET); 2908 } else { 2909 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2910 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2911 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2912 2913 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2914 QMAN_LDMA_SIZE_OFFSET); 2915 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2916 QMAN_LDMA_SRC_OFFSET); 2917 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2918 QMAN_LDMA_DST_OFFSET); 2919 2920 /* Configure RAZWI IRQ */ 2921 mme_id = mme_offset / 2922 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2923 2924 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2925 if (hdev->stop_on_err) 2926 mme_qm_err_cfg |= 2927 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2928 2929 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2930 2931 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2932 lower_32_bits(CFG_BASE + irq_handler_offset)); 2933 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2934 upper_32_bits(CFG_BASE + irq_handler_offset)); 2935 2936 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2937 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2938 mme_id); 2939 2940 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2941 QM_ARB_ERR_MSG_EN_MASK); 2942 2943 /* Set timeout to maximum */ 2944 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 2945 2946 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2947 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2948 QMAN_INTERNAL_MAKE_TRUSTED); 2949 } 2950 2951 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2952 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2953 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2954 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2955 } 2956 2957 static void gaudi_init_mme_qmans(struct hl_device *hdev) 2958 { 2959 struct gaudi_device *gaudi = hdev->asic_specific; 2960 struct gaudi_internal_qman_info *q; 2961 u64 qman_base_addr; 2962 u32 mme_offset; 2963 int i, internal_q_index; 2964 2965 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2966 return; 2967 2968 /* 2969 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2970 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2971 */ 2972 2973 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2974 2975 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2976 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2977 q = &gaudi->internal_qmans[internal_q_index]; 2978 qman_base_addr = (u64) q->pq_dma_addr; 2979 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2980 qman_base_addr); 2981 if (i == 3) 2982 mme_offset = 0; 2983 } 2984 2985 /* Initializing lower CP for MME QMANs */ 2986 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2987 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2988 gaudi_init_mme_qman(hdev, 0, 4, 0); 2989 2990 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2991 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2992 2993 gaudi->hw_cap_initialized |= HW_CAP_MME; 2994 } 2995 2996 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 2997 int qman_id, u64 qman_base_addr) 2998 { 2999 struct cpu_dyn_regs *dyn_regs = 3000 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3001 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3002 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3003 u32 tpc_qm_err_cfg, irq_handler_offset; 3004 u32 q_off, tpc_id; 3005 3006 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3007 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3008 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3009 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3010 so_base_en_lo = lower_32_bits(CFG_BASE + 3011 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3012 so_base_en_hi = upper_32_bits(CFG_BASE + 3013 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3014 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3015 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3016 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3017 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3018 so_base_ws_lo = lower_32_bits(CFG_BASE + 3019 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3020 so_base_ws_hi = upper_32_bits(CFG_BASE + 3021 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3022 3023 q_off = tpc_offset + qman_id * 4; 3024 3025 tpc_id = tpc_offset / 3026 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3027 3028 if (qman_id < 4) { 3029 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3030 lower_32_bits(qman_base_addr)); 3031 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3032 upper_32_bits(qman_base_addr)); 3033 3034 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3035 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3036 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3037 3038 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3039 QMAN_CPDMA_SIZE_OFFSET); 3040 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3041 QMAN_CPDMA_SRC_OFFSET); 3042 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3043 QMAN_CPDMA_DST_OFFSET); 3044 } else { 3045 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3046 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3047 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3048 3049 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3050 QMAN_LDMA_SIZE_OFFSET); 3051 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3052 QMAN_LDMA_SRC_OFFSET); 3053 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3054 QMAN_LDMA_DST_OFFSET); 3055 3056 /* Configure RAZWI IRQ */ 3057 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3058 if (hdev->stop_on_err) 3059 tpc_qm_err_cfg |= 3060 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3061 3062 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3063 3064 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3065 lower_32_bits(CFG_BASE + irq_handler_offset)); 3066 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3067 upper_32_bits(CFG_BASE + irq_handler_offset)); 3068 3069 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3070 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3071 tpc_id); 3072 3073 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3074 QM_ARB_ERR_MSG_EN_MASK); 3075 3076 /* Set timeout to maximum */ 3077 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3078 3079 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3080 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3081 QMAN_INTERNAL_MAKE_TRUSTED); 3082 } 3083 3084 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3085 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3086 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3087 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3088 3089 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3090 if (tpc_id == 6) { 3091 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3092 mtr_base_ws_lo); 3093 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3094 mtr_base_ws_hi); 3095 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3096 so_base_ws_lo); 3097 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3098 so_base_ws_hi); 3099 } 3100 } 3101 3102 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3103 { 3104 struct gaudi_device *gaudi = hdev->asic_specific; 3105 struct gaudi_internal_qman_info *q; 3106 u64 qman_base_addr; 3107 u32 so_base_hi, tpc_offset = 0; 3108 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3109 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3110 int i, tpc_id, internal_q_index; 3111 3112 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3113 return; 3114 3115 so_base_hi = upper_32_bits(CFG_BASE + 3116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3117 3118 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3119 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3120 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3121 tpc_id * QMAN_STREAMS + i; 3122 q = &gaudi->internal_qmans[internal_q_index]; 3123 qman_base_addr = (u64) q->pq_dma_addr; 3124 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3125 qman_base_addr); 3126 3127 if (i == 3) { 3128 /* Initializing lower CP for TPC QMAN */ 3129 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3130 3131 /* Enable the QMAN and TPC channel */ 3132 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3133 QMAN_TPC_ENABLE); 3134 } 3135 } 3136 3137 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3138 so_base_hi); 3139 3140 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3141 3142 gaudi->hw_cap_initialized |= 3143 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3144 } 3145 } 3146 3147 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3148 int qman_id, u64 qman_base_addr, int nic_id) 3149 { 3150 struct cpu_dyn_regs *dyn_regs = 3151 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3152 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3153 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3154 u32 nic_qm_err_cfg, irq_handler_offset; 3155 u32 q_off; 3156 3157 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3158 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3159 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3160 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3161 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3162 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3163 so_base_en_hi = upper_32_bits(CFG_BASE + 3164 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3165 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3166 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3167 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3168 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3169 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3170 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3171 so_base_ws_hi = upper_32_bits(CFG_BASE + 3172 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3173 3174 q_off = nic_offset + qman_id * 4; 3175 3176 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3177 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3178 3179 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3180 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3181 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3182 3183 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3184 QMAN_LDMA_SIZE_OFFSET); 3185 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3186 QMAN_LDMA_SRC_OFFSET); 3187 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3188 QMAN_LDMA_DST_OFFSET); 3189 3190 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3191 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3192 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3193 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3194 3195 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3196 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3197 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3198 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3199 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3200 3201 if (qman_id == 0) { 3202 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3203 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3204 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3205 3206 /* Configure RAZWI IRQ */ 3207 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3208 if (hdev->stop_on_err) 3209 nic_qm_err_cfg |= 3210 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3211 3212 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3213 3214 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3215 lower_32_bits(CFG_BASE + irq_handler_offset)); 3216 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3217 upper_32_bits(CFG_BASE + irq_handler_offset)); 3218 3219 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3220 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3221 nic_id); 3222 3223 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3224 QM_ARB_ERR_MSG_EN_MASK); 3225 3226 /* Set timeout to maximum */ 3227 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3228 3229 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3230 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3231 QMAN_INTERNAL_MAKE_TRUSTED); 3232 } 3233 } 3234 3235 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3236 { 3237 struct gaudi_device *gaudi = hdev->asic_specific; 3238 struct gaudi_internal_qman_info *q; 3239 u64 qman_base_addr; 3240 u32 nic_offset = 0; 3241 u32 nic_delta_between_qmans = 3242 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3243 u32 nic_delta_between_nics = 3244 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3245 int i, nic_id, internal_q_index; 3246 3247 if (!hdev->nic_ports_mask) 3248 return; 3249 3250 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3251 return; 3252 3253 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3254 3255 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3256 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3257 nic_offset += nic_delta_between_qmans; 3258 if (nic_id & 1) { 3259 nic_offset -= (nic_delta_between_qmans * 2); 3260 nic_offset += nic_delta_between_nics; 3261 } 3262 continue; 3263 } 3264 3265 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3266 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3267 nic_id * QMAN_STREAMS + i; 3268 q = &gaudi->internal_qmans[internal_q_index]; 3269 qman_base_addr = (u64) q->pq_dma_addr; 3270 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3271 qman_base_addr, nic_id); 3272 } 3273 3274 /* Enable the QMAN */ 3275 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3276 3277 nic_offset += nic_delta_between_qmans; 3278 if (nic_id & 1) { 3279 nic_offset -= (nic_delta_between_qmans * 2); 3280 nic_offset += nic_delta_between_nics; 3281 } 3282 3283 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3284 } 3285 } 3286 3287 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3288 { 3289 struct gaudi_device *gaudi = hdev->asic_specific; 3290 3291 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3292 return; 3293 3294 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3295 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3296 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3297 } 3298 3299 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3300 { 3301 struct gaudi_device *gaudi = hdev->asic_specific; 3302 3303 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3304 return; 3305 3306 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3307 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3308 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3309 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3310 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3311 } 3312 3313 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3314 { 3315 struct gaudi_device *gaudi = hdev->asic_specific; 3316 3317 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3318 return; 3319 3320 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3321 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3322 } 3323 3324 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3325 { 3326 struct gaudi_device *gaudi = hdev->asic_specific; 3327 u32 tpc_offset = 0; 3328 int tpc_id; 3329 3330 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3331 return; 3332 3333 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3334 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3335 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3336 } 3337 } 3338 3339 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3340 { 3341 struct gaudi_device *gaudi = hdev->asic_specific; 3342 u32 nic_mask, nic_offset = 0; 3343 u32 nic_delta_between_qmans = 3344 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3345 u32 nic_delta_between_nics = 3346 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3347 int nic_id; 3348 3349 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3350 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3351 3352 if (gaudi->hw_cap_initialized & nic_mask) 3353 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3354 3355 nic_offset += nic_delta_between_qmans; 3356 if (nic_id & 1) { 3357 nic_offset -= (nic_delta_between_qmans * 2); 3358 nic_offset += nic_delta_between_nics; 3359 } 3360 } 3361 } 3362 3363 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3364 { 3365 struct gaudi_device *gaudi = hdev->asic_specific; 3366 3367 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3368 return; 3369 3370 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3371 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3372 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3373 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3374 } 3375 3376 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3377 { 3378 struct gaudi_device *gaudi = hdev->asic_specific; 3379 3380 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3381 return; 3382 3383 /* Stop CPs of HBM DMA QMANs */ 3384 3385 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3386 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3387 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3388 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3389 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3390 } 3391 3392 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3393 { 3394 struct gaudi_device *gaudi = hdev->asic_specific; 3395 3396 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3397 return; 3398 3399 /* Stop CPs of MME QMANs */ 3400 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3401 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3402 } 3403 3404 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3405 { 3406 struct gaudi_device *gaudi = hdev->asic_specific; 3407 3408 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3409 return; 3410 3411 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3412 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3413 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3414 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3415 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3416 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3417 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3418 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3419 } 3420 3421 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3422 { 3423 struct gaudi_device *gaudi = hdev->asic_specific; 3424 3425 /* Stop upper CPs of QMANs */ 3426 3427 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3428 WREG32(mmNIC0_QM0_GLBL_CFG1, 3429 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3430 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3431 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3432 3433 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3434 WREG32(mmNIC0_QM1_GLBL_CFG1, 3435 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3436 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3437 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3438 3439 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3440 WREG32(mmNIC1_QM0_GLBL_CFG1, 3441 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3442 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3443 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3444 3445 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3446 WREG32(mmNIC1_QM1_GLBL_CFG1, 3447 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3448 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3449 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3450 3451 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3452 WREG32(mmNIC2_QM0_GLBL_CFG1, 3453 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3454 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3455 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3456 3457 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3458 WREG32(mmNIC2_QM1_GLBL_CFG1, 3459 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3460 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3461 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3462 3463 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3464 WREG32(mmNIC3_QM0_GLBL_CFG1, 3465 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3466 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3467 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3468 3469 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3470 WREG32(mmNIC3_QM1_GLBL_CFG1, 3471 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3472 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3473 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3474 3475 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3476 WREG32(mmNIC4_QM0_GLBL_CFG1, 3477 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3478 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3479 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3480 3481 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3482 WREG32(mmNIC4_QM1_GLBL_CFG1, 3483 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3484 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3485 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3486 } 3487 3488 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3489 { 3490 struct gaudi_device *gaudi = hdev->asic_specific; 3491 3492 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3493 return; 3494 3495 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3496 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3497 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3498 } 3499 3500 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3501 { 3502 struct gaudi_device *gaudi = hdev->asic_specific; 3503 3504 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3505 return; 3506 3507 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3508 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3509 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3510 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3511 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3512 } 3513 3514 static void gaudi_mme_stall(struct hl_device *hdev) 3515 { 3516 struct gaudi_device *gaudi = hdev->asic_specific; 3517 3518 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3519 return; 3520 3521 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3522 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3523 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3524 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3525 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3526 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3527 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3528 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3529 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3530 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3531 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3532 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3533 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3534 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3535 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3536 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3537 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3538 } 3539 3540 static void gaudi_tpc_stall(struct hl_device *hdev) 3541 { 3542 struct gaudi_device *gaudi = hdev->asic_specific; 3543 3544 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3545 return; 3546 3547 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3548 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3549 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3550 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3551 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3552 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3553 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3554 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3555 } 3556 3557 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3558 { 3559 u32 qman_offset; 3560 int i; 3561 3562 if (hdev->asic_prop.fw_security_enabled) 3563 return; 3564 3565 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3566 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3567 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3568 3569 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3570 } 3571 3572 WREG32(mmMME0_QM_CGM_CFG, 0); 3573 WREG32(mmMME0_QM_CGM_CFG1, 0); 3574 WREG32(mmMME2_QM_CGM_CFG, 0); 3575 WREG32(mmMME2_QM_CGM_CFG1, 0); 3576 3577 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3578 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3579 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3580 3581 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3582 } 3583 } 3584 3585 static void gaudi_enable_timestamp(struct hl_device *hdev) 3586 { 3587 /* Disable the timestamp counter */ 3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3589 3590 /* Zero the lower/upper parts of the 64-bit counter */ 3591 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3593 3594 /* Enable the counter */ 3595 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3596 } 3597 3598 static void gaudi_disable_timestamp(struct hl_device *hdev) 3599 { 3600 /* Disable the timestamp counter */ 3601 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3602 } 3603 3604 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3605 { 3606 u32 wait_timeout_ms; 3607 3608 if (hdev->pldm) 3609 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3610 else 3611 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3612 3613 if (fw_reset) 3614 goto skip_engines; 3615 3616 gaudi_stop_nic_qmans(hdev); 3617 gaudi_stop_mme_qmans(hdev); 3618 gaudi_stop_tpc_qmans(hdev); 3619 gaudi_stop_hbm_dma_qmans(hdev); 3620 gaudi_stop_pci_dma_qmans(hdev); 3621 3622 msleep(wait_timeout_ms); 3623 3624 gaudi_pci_dma_stall(hdev); 3625 gaudi_hbm_dma_stall(hdev); 3626 gaudi_tpc_stall(hdev); 3627 gaudi_mme_stall(hdev); 3628 3629 msleep(wait_timeout_ms); 3630 3631 gaudi_disable_nic_qmans(hdev); 3632 gaudi_disable_mme_qmans(hdev); 3633 gaudi_disable_tpc_qmans(hdev); 3634 gaudi_disable_hbm_dma_qmans(hdev); 3635 gaudi_disable_pci_dma_qmans(hdev); 3636 3637 gaudi_disable_timestamp(hdev); 3638 3639 skip_engines: 3640 gaudi_disable_msi(hdev); 3641 } 3642 3643 static int gaudi_mmu_init(struct hl_device *hdev) 3644 { 3645 struct asic_fixed_properties *prop = &hdev->asic_prop; 3646 struct gaudi_device *gaudi = hdev->asic_specific; 3647 u64 hop0_addr; 3648 int rc, i; 3649 3650 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3651 return 0; 3652 3653 for (i = 0 ; i < prop->max_asid ; i++) { 3654 hop0_addr = prop->mmu_pgt_addr + 3655 (i * prop->mmu_hop_table_size); 3656 3657 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3658 if (rc) { 3659 dev_err(hdev->dev, 3660 "failed to set hop0 addr for asid %d\n", i); 3661 return rc; 3662 } 3663 } 3664 3665 /* init MMU cache manage page */ 3666 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3667 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3668 3669 /* mem cache invalidation */ 3670 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3671 3672 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3673 if (rc) 3674 return rc; 3675 3676 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3677 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3678 3679 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3680 3681 /* 3682 * The H/W expects the first PI after init to be 1. After wraparound 3683 * we'll write 0. 3684 */ 3685 gaudi->mmu_cache_inv_pi = 1; 3686 3687 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3688 3689 return 0; 3690 } 3691 3692 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3693 { 3694 void __iomem *dst; 3695 3696 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3697 3698 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3699 } 3700 3701 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3702 { 3703 void __iomem *dst; 3704 3705 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3706 3707 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3708 } 3709 3710 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3711 { 3712 struct dynamic_fw_load_mgr *dynamic_loader; 3713 struct cpu_dyn_regs *dyn_regs; 3714 3715 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3716 3717 /* 3718 * here we update initial values for few specific dynamic regs (as 3719 * before reading the first descriptor from FW those value has to be 3720 * hard-coded) in later stages of the protocol those values will be 3721 * updated automatically by reading the FW descriptor so data there 3722 * will always be up-to-date 3723 */ 3724 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3725 dyn_regs->kmd_msg_to_cpu = 3726 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3727 dyn_regs->cpu_cmd_status_to_host = 3728 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3729 3730 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3731 } 3732 3733 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3734 { 3735 struct static_fw_load_mgr *static_loader; 3736 3737 static_loader = &hdev->fw_loader.static_loader; 3738 3739 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3740 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3741 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3742 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3743 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3744 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3745 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3746 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3747 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3748 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3749 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3750 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3751 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3752 GAUDI_PLDM_RESET_WAIT_MSEC : 3753 GAUDI_CPU_RESET_WAIT_MSEC; 3754 } 3755 3756 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3757 { 3758 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3759 3760 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3761 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3762 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3763 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3764 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3765 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3766 } 3767 3768 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3769 { 3770 struct asic_fixed_properties *prop = &hdev->asic_prop; 3771 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3772 3773 /* fill common fields */ 3774 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3775 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3776 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3777 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3778 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3779 fw_loader->skip_bmc = !hdev->bmc_enable; 3780 fw_loader->sram_bar_id = SRAM_BAR_ID; 3781 fw_loader->dram_bar_id = HBM_BAR_ID; 3782 3783 if (prop->dynamic_fw_load) 3784 gaudi_init_dynamic_firmware_loader(hdev); 3785 else 3786 gaudi_init_static_firmware_loader(hdev); 3787 } 3788 3789 static int gaudi_init_cpu(struct hl_device *hdev) 3790 { 3791 struct gaudi_device *gaudi = hdev->asic_specific; 3792 int rc; 3793 3794 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3795 return 0; 3796 3797 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3798 return 0; 3799 3800 /* 3801 * The device CPU works with 40 bits addresses. 3802 * This register sets the extension to 50 bits. 3803 */ 3804 if (!hdev->asic_prop.fw_security_enabled) 3805 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3806 3807 rc = hl_fw_init_cpu(hdev); 3808 3809 if (rc) 3810 return rc; 3811 3812 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3813 3814 return 0; 3815 } 3816 3817 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3818 { 3819 struct cpu_dyn_regs *dyn_regs = 3820 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3821 struct asic_fixed_properties *prop = &hdev->asic_prop; 3822 struct gaudi_device *gaudi = hdev->asic_specific; 3823 u32 status, irq_handler_offset; 3824 struct hl_eq *eq; 3825 struct hl_hw_queue *cpu_pq = 3826 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3827 int err; 3828 3829 if (!hdev->cpu_queues_enable) 3830 return 0; 3831 3832 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3833 return 0; 3834 3835 eq = &hdev->event_queue; 3836 3837 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3838 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3839 3840 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3841 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3842 3843 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3844 lower_32_bits(hdev->cpu_accessible_dma_address)); 3845 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3846 upper_32_bits(hdev->cpu_accessible_dma_address)); 3847 3848 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3849 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3850 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3851 3852 /* Used for EQ CI */ 3853 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3854 3855 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3856 3857 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3858 3859 irq_handler_offset = prop->gic_interrupts_enable ? 3860 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3861 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3862 3863 WREG32(irq_handler_offset, 3864 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3865 3866 err = hl_poll_timeout( 3867 hdev, 3868 mmCPU_IF_QUEUE_INIT, 3869 status, 3870 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3871 1000, 3872 cpu_timeout); 3873 3874 if (err) { 3875 dev_err(hdev->dev, 3876 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3877 return -EIO; 3878 } 3879 3880 /* update FW application security bits */ 3881 if (prop->fw_cpu_boot_dev_sts0_valid) 3882 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3883 if (prop->fw_cpu_boot_dev_sts1_valid) 3884 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3885 3886 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3887 return 0; 3888 } 3889 3890 static void gaudi_pre_hw_init(struct hl_device *hdev) 3891 { 3892 /* Perform read from the device to make sure device is up */ 3893 RREG32(mmHW_STATE); 3894 3895 if (!hdev->asic_prop.fw_security_enabled) { 3896 /* Set the access through PCI bars (Linux driver only) as 3897 * secured 3898 */ 3899 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3900 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3901 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3902 3903 /* Perform read to flush the waiting writes to ensure 3904 * configuration was set in the device 3905 */ 3906 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3907 } 3908 3909 /* 3910 * Let's mark in the H/W that we have reached this point. We check 3911 * this value in the reset_before_init function to understand whether 3912 * we need to reset the chip before doing H/W init. This register is 3913 * cleared by the H/W upon H/W reset 3914 */ 3915 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3916 } 3917 3918 static int gaudi_hw_init(struct hl_device *hdev) 3919 { 3920 struct gaudi_device *gaudi = hdev->asic_specific; 3921 int rc; 3922 3923 gaudi_pre_hw_init(hdev); 3924 3925 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3926 * So we set it here and if anyone tries to move it later to 3927 * a different address, there will be an error 3928 */ 3929 if (hdev->asic_prop.iatu_done_by_fw) 3930 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3931 3932 /* 3933 * Before pushing u-boot/linux to device, need to set the hbm bar to 3934 * base address of dram 3935 */ 3936 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 3937 dev_err(hdev->dev, 3938 "failed to map HBM bar to DRAM base address\n"); 3939 return -EIO; 3940 } 3941 3942 rc = gaudi_init_cpu(hdev); 3943 if (rc) { 3944 dev_err(hdev->dev, "failed to initialize CPU\n"); 3945 return rc; 3946 } 3947 3948 /* In case the clock gating was enabled in preboot we need to disable 3949 * it here before touching the MME/TPC registers. 3950 */ 3951 gaudi_disable_clock_gating(hdev); 3952 3953 /* SRAM scrambler must be initialized after CPU is running from HBM */ 3954 gaudi_init_scrambler_sram(hdev); 3955 3956 /* This is here just in case we are working without CPU */ 3957 gaudi_init_scrambler_hbm(hdev); 3958 3959 gaudi_init_golden_registers(hdev); 3960 3961 rc = gaudi_mmu_init(hdev); 3962 if (rc) 3963 return rc; 3964 3965 gaudi_init_security(hdev); 3966 3967 gaudi_init_pci_dma_qmans(hdev); 3968 3969 gaudi_init_hbm_dma_qmans(hdev); 3970 3971 gaudi_init_mme_qmans(hdev); 3972 3973 gaudi_init_tpc_qmans(hdev); 3974 3975 gaudi_init_nic_qmans(hdev); 3976 3977 gaudi_enable_timestamp(hdev); 3978 3979 /* MSI must be enabled before CPU queues and NIC are initialized */ 3980 rc = gaudi_enable_msi(hdev); 3981 if (rc) 3982 goto disable_queues; 3983 3984 /* must be called after MSI was enabled */ 3985 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 3986 if (rc) { 3987 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3988 rc); 3989 goto disable_msi; 3990 } 3991 3992 /* Perform read from the device to flush all configuration */ 3993 RREG32(mmHW_STATE); 3994 3995 return 0; 3996 3997 disable_msi: 3998 gaudi_disable_msi(hdev); 3999 disable_queues: 4000 gaudi_disable_mme_qmans(hdev); 4001 gaudi_disable_pci_dma_qmans(hdev); 4002 4003 return rc; 4004 } 4005 4006 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4007 { 4008 struct cpu_dyn_regs *dyn_regs = 4009 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4010 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4011 struct gaudi_device *gaudi = hdev->asic_specific; 4012 bool driver_performs_reset; 4013 4014 if (!hard_reset) { 4015 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4016 return 0; 4017 } 4018 4019 if (hdev->pldm) { 4020 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4021 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4022 } else { 4023 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4024 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4025 } 4026 4027 if (fw_reset) { 4028 dev_dbg(hdev->dev, 4029 "Firmware performs HARD reset, going to wait %dms\n", 4030 reset_timeout_ms); 4031 4032 goto skip_reset; 4033 } 4034 4035 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4036 !hdev->asic_prop.hard_reset_done_by_fw); 4037 4038 /* Set device to handle FLR by H/W as we will put the device CPU to 4039 * halt mode 4040 */ 4041 if (driver_performs_reset) 4042 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4043 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4044 4045 /* If linux is loaded in the device CPU we need to communicate with it 4046 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4047 * registers in case of old F/Ws 4048 */ 4049 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4050 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4051 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4052 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4053 4054 WREG32(irq_handler_offset, 4055 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4056 4057 /* This is a hail-mary attempt to revive the card in the small chance that the 4058 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4059 * In that case, triggering reset through GIC won't help. We need to trigger the 4060 * reset as if Linux wasn't loaded. 4061 * 4062 * We do it only if the reset cause was HB, because that would be the indication 4063 * of such an event. 4064 * 4065 * In case watchdog hasn't expired but we still got HB, then this won't do any 4066 * damage. 4067 */ 4068 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4069 if (hdev->asic_prop.hard_reset_done_by_fw) 4070 hl_fw_ask_hard_reset_without_linux(hdev); 4071 else 4072 hl_fw_ask_halt_machine_without_linux(hdev); 4073 } 4074 } else { 4075 if (hdev->asic_prop.hard_reset_done_by_fw) 4076 hl_fw_ask_hard_reset_without_linux(hdev); 4077 else 4078 hl_fw_ask_halt_machine_without_linux(hdev); 4079 } 4080 4081 if (driver_performs_reset) { 4082 4083 /* Configure the reset registers. Must be done as early as 4084 * possible in case we fail during H/W initialization 4085 */ 4086 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4087 (CFG_RST_H_DMA_MASK | 4088 CFG_RST_H_MME_MASK | 4089 CFG_RST_H_SM_MASK | 4090 CFG_RST_H_TPC_7_MASK)); 4091 4092 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4093 4094 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4095 (CFG_RST_H_HBM_MASK | 4096 CFG_RST_H_TPC_7_MASK | 4097 CFG_RST_H_NIC_MASK | 4098 CFG_RST_H_SM_MASK | 4099 CFG_RST_H_DMA_MASK | 4100 CFG_RST_H_MME_MASK | 4101 CFG_RST_H_CPU_MASK | 4102 CFG_RST_H_MMU_MASK)); 4103 4104 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4105 (CFG_RST_L_IF_MASK | 4106 CFG_RST_L_PSOC_MASK | 4107 CFG_RST_L_TPC_MASK)); 4108 4109 msleep(cpu_timeout_ms); 4110 4111 /* Tell ASIC not to re-initialize PCIe */ 4112 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4113 4114 /* Restart BTL/BLR upon hard-reset */ 4115 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4116 4117 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4118 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4119 4120 dev_dbg(hdev->dev, 4121 "Issued HARD reset command, going to wait %dms\n", 4122 reset_timeout_ms); 4123 } else { 4124 dev_dbg(hdev->dev, 4125 "Firmware performs HARD reset, going to wait %dms\n", 4126 reset_timeout_ms); 4127 } 4128 4129 skip_reset: 4130 /* 4131 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4132 * itself is in reset. Need to wait until the reset is deasserted 4133 */ 4134 msleep(reset_timeout_ms); 4135 4136 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4137 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4138 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4139 return -ETIMEDOUT; 4140 } 4141 4142 if (gaudi) { 4143 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4144 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4145 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4146 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4147 HW_CAP_HBM_SCRAMBLER); 4148 4149 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4150 4151 hdev->device_cpu_is_halted = false; 4152 } 4153 return 0; 4154 } 4155 4156 static int gaudi_suspend(struct hl_device *hdev) 4157 { 4158 int rc; 4159 4160 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4161 if (rc) 4162 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); 4163 4164 return rc; 4165 } 4166 4167 static int gaudi_resume(struct hl_device *hdev) 4168 { 4169 return gaudi_init_iatu(hdev); 4170 } 4171 4172 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4173 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4174 { 4175 int rc; 4176 4177 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4178 VM_DONTCOPY | VM_NORESERVE); 4179 4180 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4181 (dma_addr - HOST_PHYS_BASE), size); 4182 if (rc) 4183 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4184 4185 return rc; 4186 } 4187 4188 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4189 { 4190 struct cpu_dyn_regs *dyn_regs = 4191 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4192 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4193 struct gaudi_device *gaudi = hdev->asic_specific; 4194 bool invalid_queue = false; 4195 int dma_id; 4196 4197 switch (hw_queue_id) { 4198 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4199 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4200 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4201 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4202 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4203 break; 4204 4205 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4206 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4207 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4208 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4209 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4210 break; 4211 4212 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4213 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4214 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4215 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4216 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4217 break; 4218 4219 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4220 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4221 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4222 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4223 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4224 break; 4225 4226 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4227 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4228 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4229 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4230 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4231 break; 4232 4233 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4234 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4235 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4236 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4237 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4238 break; 4239 4240 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4241 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4242 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4243 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4244 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4245 break; 4246 4247 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4248 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4249 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4250 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4251 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4252 break; 4253 4254 case GAUDI_QUEUE_ID_CPU_PQ: 4255 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4256 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4257 else 4258 invalid_queue = true; 4259 break; 4260 4261 case GAUDI_QUEUE_ID_MME_0_0: 4262 db_reg_offset = mmMME2_QM_PQ_PI_0; 4263 break; 4264 4265 case GAUDI_QUEUE_ID_MME_0_1: 4266 db_reg_offset = mmMME2_QM_PQ_PI_1; 4267 break; 4268 4269 case GAUDI_QUEUE_ID_MME_0_2: 4270 db_reg_offset = mmMME2_QM_PQ_PI_2; 4271 break; 4272 4273 case GAUDI_QUEUE_ID_MME_0_3: 4274 db_reg_offset = mmMME2_QM_PQ_PI_3; 4275 break; 4276 4277 case GAUDI_QUEUE_ID_MME_1_0: 4278 db_reg_offset = mmMME0_QM_PQ_PI_0; 4279 break; 4280 4281 case GAUDI_QUEUE_ID_MME_1_1: 4282 db_reg_offset = mmMME0_QM_PQ_PI_1; 4283 break; 4284 4285 case GAUDI_QUEUE_ID_MME_1_2: 4286 db_reg_offset = mmMME0_QM_PQ_PI_2; 4287 break; 4288 4289 case GAUDI_QUEUE_ID_MME_1_3: 4290 db_reg_offset = mmMME0_QM_PQ_PI_3; 4291 break; 4292 4293 case GAUDI_QUEUE_ID_TPC_0_0: 4294 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4295 break; 4296 4297 case GAUDI_QUEUE_ID_TPC_0_1: 4298 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4299 break; 4300 4301 case GAUDI_QUEUE_ID_TPC_0_2: 4302 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4303 break; 4304 4305 case GAUDI_QUEUE_ID_TPC_0_3: 4306 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4307 break; 4308 4309 case GAUDI_QUEUE_ID_TPC_1_0: 4310 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4311 break; 4312 4313 case GAUDI_QUEUE_ID_TPC_1_1: 4314 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4315 break; 4316 4317 case GAUDI_QUEUE_ID_TPC_1_2: 4318 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4319 break; 4320 4321 case GAUDI_QUEUE_ID_TPC_1_3: 4322 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4323 break; 4324 4325 case GAUDI_QUEUE_ID_TPC_2_0: 4326 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4327 break; 4328 4329 case GAUDI_QUEUE_ID_TPC_2_1: 4330 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4331 break; 4332 4333 case GAUDI_QUEUE_ID_TPC_2_2: 4334 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4335 break; 4336 4337 case GAUDI_QUEUE_ID_TPC_2_3: 4338 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4339 break; 4340 4341 case GAUDI_QUEUE_ID_TPC_3_0: 4342 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4343 break; 4344 4345 case GAUDI_QUEUE_ID_TPC_3_1: 4346 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4347 break; 4348 4349 case GAUDI_QUEUE_ID_TPC_3_2: 4350 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4351 break; 4352 4353 case GAUDI_QUEUE_ID_TPC_3_3: 4354 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4355 break; 4356 4357 case GAUDI_QUEUE_ID_TPC_4_0: 4358 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4359 break; 4360 4361 case GAUDI_QUEUE_ID_TPC_4_1: 4362 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4363 break; 4364 4365 case GAUDI_QUEUE_ID_TPC_4_2: 4366 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4367 break; 4368 4369 case GAUDI_QUEUE_ID_TPC_4_3: 4370 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4371 break; 4372 4373 case GAUDI_QUEUE_ID_TPC_5_0: 4374 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4375 break; 4376 4377 case GAUDI_QUEUE_ID_TPC_5_1: 4378 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4379 break; 4380 4381 case GAUDI_QUEUE_ID_TPC_5_2: 4382 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4383 break; 4384 4385 case GAUDI_QUEUE_ID_TPC_5_3: 4386 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4387 break; 4388 4389 case GAUDI_QUEUE_ID_TPC_6_0: 4390 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4391 break; 4392 4393 case GAUDI_QUEUE_ID_TPC_6_1: 4394 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4395 break; 4396 4397 case GAUDI_QUEUE_ID_TPC_6_2: 4398 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4399 break; 4400 4401 case GAUDI_QUEUE_ID_TPC_6_3: 4402 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4403 break; 4404 4405 case GAUDI_QUEUE_ID_TPC_7_0: 4406 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4407 break; 4408 4409 case GAUDI_QUEUE_ID_TPC_7_1: 4410 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4411 break; 4412 4413 case GAUDI_QUEUE_ID_TPC_7_2: 4414 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4415 break; 4416 4417 case GAUDI_QUEUE_ID_TPC_7_3: 4418 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4419 break; 4420 4421 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4422 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4423 invalid_queue = true; 4424 4425 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4426 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4427 break; 4428 4429 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4430 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4431 invalid_queue = true; 4432 4433 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4434 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4435 break; 4436 4437 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4438 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4439 invalid_queue = true; 4440 4441 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4442 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4443 break; 4444 4445 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4446 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4447 invalid_queue = true; 4448 4449 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4450 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4451 break; 4452 4453 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4454 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4455 invalid_queue = true; 4456 4457 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4458 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4459 break; 4460 4461 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4462 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4463 invalid_queue = true; 4464 4465 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4466 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4467 break; 4468 4469 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4470 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4471 invalid_queue = true; 4472 4473 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4474 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4475 break; 4476 4477 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4478 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4479 invalid_queue = true; 4480 4481 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4482 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4483 break; 4484 4485 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4486 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4487 invalid_queue = true; 4488 4489 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4490 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4491 break; 4492 4493 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4494 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4495 invalid_queue = true; 4496 4497 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4498 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4499 break; 4500 4501 default: 4502 invalid_queue = true; 4503 } 4504 4505 if (invalid_queue) { 4506 /* Should never get here */ 4507 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4508 hw_queue_id); 4509 return; 4510 } 4511 4512 db_value = pi; 4513 4514 /* ring the doorbell */ 4515 WREG32(db_reg_offset, db_value); 4516 4517 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4518 /* make sure device CPU will read latest data from host */ 4519 mb(); 4520 4521 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4522 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4523 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4524 4525 WREG32(irq_handler_offset, 4526 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4527 } 4528 } 4529 4530 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4531 struct hl_bd *bd) 4532 { 4533 __le64 *pbd = (__le64 *) bd; 4534 4535 /* The QMANs are on the host memory so a simple copy suffice */ 4536 pqe[0] = pbd[0]; 4537 pqe[1] = pbd[1]; 4538 } 4539 4540 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4541 dma_addr_t *dma_handle, gfp_t flags) 4542 { 4543 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4544 dma_handle, flags); 4545 4546 /* Shift to the device's base physical address of host memory */ 4547 if (kernel_addr) 4548 *dma_handle += HOST_PHYS_BASE; 4549 4550 return kernel_addr; 4551 } 4552 4553 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4554 void *cpu_addr, dma_addr_t dma_handle) 4555 { 4556 /* Cancel the device's base physical address of host memory */ 4557 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4558 4559 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4560 } 4561 4562 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4563 { 4564 struct asic_fixed_properties *prop = &hdev->asic_prop; 4565 u64 cur_addr = prop->dram_user_base_address; 4566 u32 chunk_size, busy; 4567 int rc, dma_id; 4568 4569 while (cur_addr < prop->dram_end_address) { 4570 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4571 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4572 4573 chunk_size = 4574 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4575 4576 dev_dbg(hdev->dev, 4577 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4578 cur_addr, cur_addr + chunk_size); 4579 4580 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4581 lower_32_bits(val)); 4582 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4583 upper_32_bits(val)); 4584 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4585 lower_32_bits(cur_addr)); 4586 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4587 upper_32_bits(cur_addr)); 4588 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4589 chunk_size); 4590 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4591 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4592 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4593 4594 cur_addr += chunk_size; 4595 4596 if (cur_addr == prop->dram_end_address) 4597 break; 4598 } 4599 4600 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4601 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4602 4603 rc = hl_poll_timeout( 4604 hdev, 4605 mmDMA0_CORE_STS0 + dma_offset, 4606 busy, 4607 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4608 1000, 4609 HBM_SCRUBBING_TIMEOUT_US); 4610 4611 if (rc) { 4612 dev_err(hdev->dev, 4613 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4614 dma_id); 4615 return -EIO; 4616 } 4617 } 4618 } 4619 4620 return 0; 4621 } 4622 4623 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4624 { 4625 struct asic_fixed_properties *prop = &hdev->asic_prop; 4626 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US; 4627 u64 addr, size, val = hdev->memory_scrub_val; 4628 ktime_t timeout; 4629 int rc = 0; 4630 4631 if (!hdev->memory_scrub) 4632 return 0; 4633 4634 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4635 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4636 if (ktime_compare(ktime_get(), timeout) > 0) { 4637 dev_err(hdev->dev, "waiting for idle timeout\n"); 4638 return -ETIMEDOUT; 4639 } 4640 usleep_range((1000 >> 2) + 1, 1000); 4641 } 4642 4643 /* Scrub SRAM */ 4644 addr = prop->sram_user_base_address; 4645 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4646 4647 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4648 addr, addr + size, val); 4649 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4650 if (rc) { 4651 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4652 return rc; 4653 } 4654 4655 /* Scrub HBM using all DMA channels in parallel */ 4656 rc = gaudi_scrub_device_dram(hdev, val); 4657 if (rc) { 4658 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4659 return rc; 4660 } 4661 4662 return 0; 4663 } 4664 4665 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4666 u32 queue_id, dma_addr_t *dma_handle, 4667 u16 *queue_len) 4668 { 4669 struct gaudi_device *gaudi = hdev->asic_specific; 4670 struct gaudi_internal_qman_info *q; 4671 4672 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4673 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4674 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4675 return NULL; 4676 } 4677 4678 q = &gaudi->internal_qmans[queue_id]; 4679 *dma_handle = q->pq_dma_addr; 4680 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4681 4682 return q->pq_kernel_addr; 4683 } 4684 4685 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4686 u16 len, u32 timeout, u64 *result) 4687 { 4688 struct gaudi_device *gaudi = hdev->asic_specific; 4689 4690 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4691 if (result) 4692 *result = 0; 4693 return 0; 4694 } 4695 4696 if (!timeout) 4697 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4698 4699 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4700 timeout, result); 4701 } 4702 4703 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4704 { 4705 struct packet_msg_prot *fence_pkt; 4706 dma_addr_t pkt_dma_addr; 4707 u32 fence_val, tmp, timeout_usec; 4708 dma_addr_t fence_dma_addr; 4709 u32 *fence_ptr; 4710 int rc; 4711 4712 if (hdev->pldm) 4713 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4714 else 4715 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4716 4717 fence_val = GAUDI_QMAN0_FENCE_VAL; 4718 4719 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4720 if (!fence_ptr) { 4721 dev_err(hdev->dev, 4722 "Failed to allocate memory for H/W queue %d testing\n", 4723 hw_queue_id); 4724 return -ENOMEM; 4725 } 4726 4727 *fence_ptr = 0; 4728 4729 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4730 &pkt_dma_addr); 4731 if (!fence_pkt) { 4732 dev_err(hdev->dev, 4733 "Failed to allocate packet for H/W queue %d testing\n", 4734 hw_queue_id); 4735 rc = -ENOMEM; 4736 goto free_fence_ptr; 4737 } 4738 4739 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4740 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4741 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4742 4743 fence_pkt->ctl = cpu_to_le32(tmp); 4744 fence_pkt->value = cpu_to_le32(fence_val); 4745 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4746 4747 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4748 sizeof(struct packet_msg_prot), 4749 pkt_dma_addr); 4750 if (rc) { 4751 dev_err(hdev->dev, 4752 "Failed to send fence packet to H/W queue %d\n", 4753 hw_queue_id); 4754 goto free_pkt; 4755 } 4756 4757 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4758 1000, timeout_usec, true); 4759 4760 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4761 4762 if (rc == -ETIMEDOUT) { 4763 dev_err(hdev->dev, 4764 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4765 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4766 rc = -EIO; 4767 } 4768 4769 free_pkt: 4770 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4771 free_fence_ptr: 4772 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4773 return rc; 4774 } 4775 4776 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4777 { 4778 struct gaudi_device *gaudi = hdev->asic_specific; 4779 4780 /* 4781 * check capability here as send_cpu_message() won't update the result 4782 * value if no capability 4783 */ 4784 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4785 return 0; 4786 4787 return hl_fw_test_cpu_queue(hdev); 4788 } 4789 4790 static int gaudi_test_queues(struct hl_device *hdev) 4791 { 4792 int i, rc, ret_val = 0; 4793 4794 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4795 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4796 rc = gaudi_test_queue(hdev, i); 4797 if (rc) 4798 ret_val = -EINVAL; 4799 } 4800 } 4801 4802 rc = gaudi_test_cpu_queue(hdev); 4803 if (rc) 4804 ret_val = -EINVAL; 4805 4806 return ret_val; 4807 } 4808 4809 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4810 gfp_t mem_flags, dma_addr_t *dma_handle) 4811 { 4812 void *kernel_addr; 4813 4814 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4815 return NULL; 4816 4817 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4818 4819 /* Shift to the device's base physical address of host memory */ 4820 if (kernel_addr) 4821 *dma_handle += HOST_PHYS_BASE; 4822 4823 return kernel_addr; 4824 } 4825 4826 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4827 dma_addr_t dma_addr) 4828 { 4829 /* Cancel the device's base physical address of host memory */ 4830 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4831 4832 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4833 } 4834 4835 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4836 size_t size, dma_addr_t *dma_handle) 4837 { 4838 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4839 } 4840 4841 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4842 size_t size, void *vaddr) 4843 { 4844 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4845 } 4846 4847 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4848 { 4849 struct scatterlist *sg, *sg_next_iter; 4850 u32 count, dma_desc_cnt; 4851 u64 len, len_next; 4852 dma_addr_t addr, addr_next; 4853 4854 dma_desc_cnt = 0; 4855 4856 for_each_sgtable_dma_sg(sgt, sg, count) { 4857 len = sg_dma_len(sg); 4858 addr = sg_dma_address(sg); 4859 4860 if (len == 0) 4861 break; 4862 4863 while ((count + 1) < sgt->nents) { 4864 sg_next_iter = sg_next(sg); 4865 len_next = sg_dma_len(sg_next_iter); 4866 addr_next = sg_dma_address(sg_next_iter); 4867 4868 if (len_next == 0) 4869 break; 4870 4871 if ((addr + len == addr_next) && 4872 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4873 len += len_next; 4874 count++; 4875 sg = sg_next_iter; 4876 } else { 4877 break; 4878 } 4879 } 4880 4881 dma_desc_cnt++; 4882 } 4883 4884 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4885 } 4886 4887 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4888 struct hl_cs_parser *parser, 4889 struct packet_lin_dma *user_dma_pkt, 4890 u64 addr, enum dma_data_direction dir) 4891 { 4892 struct hl_userptr *userptr; 4893 int rc; 4894 4895 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4896 parser->job_userptr_list, &userptr)) 4897 goto already_pinned; 4898 4899 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4900 if (!userptr) 4901 return -ENOMEM; 4902 4903 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4904 userptr); 4905 if (rc) 4906 goto free_userptr; 4907 4908 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4909 4910 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); 4911 if (rc) { 4912 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4913 goto unpin_memory; 4914 } 4915 4916 userptr->dma_mapped = true; 4917 userptr->dir = dir; 4918 4919 already_pinned: 4920 parser->patched_cb_size += 4921 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4922 4923 return 0; 4924 4925 unpin_memory: 4926 list_del(&userptr->job_node); 4927 hl_unpin_host_memory(hdev, userptr); 4928 free_userptr: 4929 kfree(userptr); 4930 return rc; 4931 } 4932 4933 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4934 struct hl_cs_parser *parser, 4935 struct packet_lin_dma *user_dma_pkt, 4936 bool src_in_host) 4937 { 4938 enum dma_data_direction dir; 4939 bool skip_host_mem_pin = false, user_memset; 4940 u64 addr; 4941 int rc = 0; 4942 4943 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 4944 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 4945 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 4946 4947 if (src_in_host) { 4948 if (user_memset) 4949 skip_host_mem_pin = true; 4950 4951 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 4952 dir = DMA_TO_DEVICE; 4953 addr = le64_to_cpu(user_dma_pkt->src_addr); 4954 } else { 4955 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 4956 dir = DMA_FROM_DEVICE; 4957 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4958 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4959 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4960 } 4961 4962 if (skip_host_mem_pin) 4963 parser->patched_cb_size += sizeof(*user_dma_pkt); 4964 else 4965 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 4966 addr, dir); 4967 4968 return rc; 4969 } 4970 4971 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 4972 struct hl_cs_parser *parser, 4973 struct packet_lin_dma *user_dma_pkt) 4974 { 4975 bool src_in_host = false; 4976 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4977 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4978 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4979 4980 dev_dbg(hdev->dev, "DMA packet details:\n"); 4981 dev_dbg(hdev->dev, "source == 0x%llx\n", 4982 le64_to_cpu(user_dma_pkt->src_addr)); 4983 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 4984 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 4985 4986 /* 4987 * Special handling for DMA with size 0. Bypass all validations 4988 * because no transactions will be done except for WR_COMP, which 4989 * is not a security issue 4990 */ 4991 if (!le32_to_cpu(user_dma_pkt->tsize)) { 4992 parser->patched_cb_size += sizeof(*user_dma_pkt); 4993 return 0; 4994 } 4995 4996 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 4997 src_in_host = true; 4998 4999 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5000 src_in_host); 5001 } 5002 5003 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5004 struct hl_cs_parser *parser, 5005 struct packet_load_and_exe *user_pkt) 5006 { 5007 u32 cfg; 5008 5009 cfg = le32_to_cpu(user_pkt->cfg); 5010 5011 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5012 dev_err(hdev->dev, 5013 "User not allowed to use Load and Execute\n"); 5014 return -EPERM; 5015 } 5016 5017 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5018 5019 return 0; 5020 } 5021 5022 static int gaudi_validate_cb(struct hl_device *hdev, 5023 struct hl_cs_parser *parser, bool is_mmu) 5024 { 5025 u32 cb_parsed_length = 0; 5026 int rc = 0; 5027 5028 parser->patched_cb_size = 0; 5029 5030 /* cb_user_size is more than 0 so loop will always be executed */ 5031 while (cb_parsed_length < parser->user_cb_size) { 5032 enum packet_id pkt_id; 5033 u16 pkt_size; 5034 struct gaudi_packet *user_pkt; 5035 5036 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5037 5038 pkt_id = (enum packet_id) ( 5039 (le64_to_cpu(user_pkt->header) & 5040 PACKET_HEADER_PACKET_ID_MASK) >> 5041 PACKET_HEADER_PACKET_ID_SHIFT); 5042 5043 if (!validate_packet_id(pkt_id)) { 5044 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5045 rc = -EINVAL; 5046 break; 5047 } 5048 5049 pkt_size = gaudi_packet_sizes[pkt_id]; 5050 cb_parsed_length += pkt_size; 5051 if (cb_parsed_length > parser->user_cb_size) { 5052 dev_err(hdev->dev, 5053 "packet 0x%x is out of CB boundary\n", pkt_id); 5054 rc = -EINVAL; 5055 break; 5056 } 5057 5058 switch (pkt_id) { 5059 case PACKET_MSG_PROT: 5060 dev_err(hdev->dev, 5061 "User not allowed to use MSG_PROT\n"); 5062 rc = -EPERM; 5063 break; 5064 5065 case PACKET_CP_DMA: 5066 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5067 rc = -EPERM; 5068 break; 5069 5070 case PACKET_STOP: 5071 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5072 rc = -EPERM; 5073 break; 5074 5075 case PACKET_WREG_BULK: 5076 dev_err(hdev->dev, 5077 "User not allowed to use WREG_BULK\n"); 5078 rc = -EPERM; 5079 break; 5080 5081 case PACKET_LOAD_AND_EXE: 5082 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5083 (struct packet_load_and_exe *) user_pkt); 5084 break; 5085 5086 case PACKET_LIN_DMA: 5087 parser->contains_dma_pkt = true; 5088 if (is_mmu) 5089 parser->patched_cb_size += pkt_size; 5090 else 5091 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5092 (struct packet_lin_dma *) user_pkt); 5093 break; 5094 5095 case PACKET_WREG_32: 5096 case PACKET_MSG_LONG: 5097 case PACKET_MSG_SHORT: 5098 case PACKET_REPEAT: 5099 case PACKET_FENCE: 5100 case PACKET_NOP: 5101 case PACKET_ARB_POINT: 5102 parser->patched_cb_size += pkt_size; 5103 break; 5104 5105 default: 5106 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5107 pkt_id); 5108 rc = -EINVAL; 5109 break; 5110 } 5111 5112 if (rc) 5113 break; 5114 } 5115 5116 /* 5117 * The new CB should have space at the end for two MSG_PROT packets: 5118 * 1. Optional NOP padding for cacheline alignment 5119 * 2. A packet that will act as a completion packet 5120 * 3. A packet that will generate MSI interrupt 5121 */ 5122 if (parser->completion) 5123 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5124 parser->patched_cb_size); 5125 5126 return rc; 5127 } 5128 5129 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5130 struct hl_cs_parser *parser, 5131 struct packet_lin_dma *user_dma_pkt, 5132 struct packet_lin_dma *new_dma_pkt, 5133 u32 *new_dma_pkt_size) 5134 { 5135 struct hl_userptr *userptr; 5136 struct scatterlist *sg, *sg_next_iter; 5137 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5138 u64 len, len_next; 5139 dma_addr_t dma_addr, dma_addr_next; 5140 u64 device_memory_addr, addr; 5141 enum dma_data_direction dir; 5142 struct sg_table *sgt; 5143 bool src_in_host = false; 5144 bool skip_host_mem_pin = false; 5145 bool user_memset; 5146 5147 ctl = le32_to_cpu(user_dma_pkt->ctl); 5148 5149 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5150 src_in_host = true; 5151 5152 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5153 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5154 5155 if (src_in_host) { 5156 addr = le64_to_cpu(user_dma_pkt->src_addr); 5157 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5158 dir = DMA_TO_DEVICE; 5159 if (user_memset) 5160 skip_host_mem_pin = true; 5161 } else { 5162 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5163 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5164 dir = DMA_FROM_DEVICE; 5165 } 5166 5167 if ((!skip_host_mem_pin) && 5168 (!hl_userptr_is_pinned(hdev, addr, 5169 le32_to_cpu(user_dma_pkt->tsize), 5170 parser->job_userptr_list, &userptr))) { 5171 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5172 addr, user_dma_pkt->tsize); 5173 return -EFAULT; 5174 } 5175 5176 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5177 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5178 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5179 return 0; 5180 } 5181 5182 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5183 5184 sgt = userptr->sgt; 5185 dma_desc_cnt = 0; 5186 5187 for_each_sgtable_dma_sg(sgt, sg, count) { 5188 len = sg_dma_len(sg); 5189 dma_addr = sg_dma_address(sg); 5190 5191 if (len == 0) 5192 break; 5193 5194 while ((count + 1) < sgt->nents) { 5195 sg_next_iter = sg_next(sg); 5196 len_next = sg_dma_len(sg_next_iter); 5197 dma_addr_next = sg_dma_address(sg_next_iter); 5198 5199 if (len_next == 0) 5200 break; 5201 5202 if ((dma_addr + len == dma_addr_next) && 5203 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5204 len += len_next; 5205 count++; 5206 sg = sg_next_iter; 5207 } else { 5208 break; 5209 } 5210 } 5211 5212 ctl = le32_to_cpu(user_dma_pkt->ctl); 5213 if (likely(dma_desc_cnt)) 5214 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5215 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5216 new_dma_pkt->ctl = cpu_to_le32(ctl); 5217 new_dma_pkt->tsize = cpu_to_le32(len); 5218 5219 if (dir == DMA_TO_DEVICE) { 5220 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5221 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5222 } else { 5223 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5224 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5225 } 5226 5227 if (!user_memset) 5228 device_memory_addr += len; 5229 dma_desc_cnt++; 5230 new_dma_pkt++; 5231 } 5232 5233 if (!dma_desc_cnt) { 5234 dev_err(hdev->dev, 5235 "Error of 0 SG entries when patching DMA packet\n"); 5236 return -EFAULT; 5237 } 5238 5239 /* Fix the last dma packet - wrcomp must be as user set it */ 5240 new_dma_pkt--; 5241 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5242 5243 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5244 5245 return 0; 5246 } 5247 5248 static int gaudi_patch_cb(struct hl_device *hdev, 5249 struct hl_cs_parser *parser) 5250 { 5251 u32 cb_parsed_length = 0; 5252 u32 cb_patched_cur_length = 0; 5253 int rc = 0; 5254 5255 /* cb_user_size is more than 0 so loop will always be executed */ 5256 while (cb_parsed_length < parser->user_cb_size) { 5257 enum packet_id pkt_id; 5258 u16 pkt_size; 5259 u32 new_pkt_size = 0; 5260 struct gaudi_packet *user_pkt, *kernel_pkt; 5261 5262 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5263 kernel_pkt = parser->patched_cb->kernel_address + 5264 cb_patched_cur_length; 5265 5266 pkt_id = (enum packet_id) ( 5267 (le64_to_cpu(user_pkt->header) & 5268 PACKET_HEADER_PACKET_ID_MASK) >> 5269 PACKET_HEADER_PACKET_ID_SHIFT); 5270 5271 if (!validate_packet_id(pkt_id)) { 5272 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5273 rc = -EINVAL; 5274 break; 5275 } 5276 5277 pkt_size = gaudi_packet_sizes[pkt_id]; 5278 cb_parsed_length += pkt_size; 5279 if (cb_parsed_length > parser->user_cb_size) { 5280 dev_err(hdev->dev, 5281 "packet 0x%x is out of CB boundary\n", pkt_id); 5282 rc = -EINVAL; 5283 break; 5284 } 5285 5286 switch (pkt_id) { 5287 case PACKET_LIN_DMA: 5288 rc = gaudi_patch_dma_packet(hdev, parser, 5289 (struct packet_lin_dma *) user_pkt, 5290 (struct packet_lin_dma *) kernel_pkt, 5291 &new_pkt_size); 5292 cb_patched_cur_length += new_pkt_size; 5293 break; 5294 5295 case PACKET_MSG_PROT: 5296 dev_err(hdev->dev, 5297 "User not allowed to use MSG_PROT\n"); 5298 rc = -EPERM; 5299 break; 5300 5301 case PACKET_CP_DMA: 5302 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5303 rc = -EPERM; 5304 break; 5305 5306 case PACKET_STOP: 5307 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5308 rc = -EPERM; 5309 break; 5310 5311 case PACKET_WREG_32: 5312 case PACKET_WREG_BULK: 5313 case PACKET_MSG_LONG: 5314 case PACKET_MSG_SHORT: 5315 case PACKET_REPEAT: 5316 case PACKET_FENCE: 5317 case PACKET_NOP: 5318 case PACKET_ARB_POINT: 5319 case PACKET_LOAD_AND_EXE: 5320 memcpy(kernel_pkt, user_pkt, pkt_size); 5321 cb_patched_cur_length += pkt_size; 5322 break; 5323 5324 default: 5325 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5326 pkt_id); 5327 rc = -EINVAL; 5328 break; 5329 } 5330 5331 if (rc) 5332 break; 5333 } 5334 5335 return rc; 5336 } 5337 5338 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5339 struct hl_cs_parser *parser) 5340 { 5341 u64 handle; 5342 u32 patched_cb_size; 5343 struct hl_cb *user_cb; 5344 int rc; 5345 5346 /* 5347 * The new CB should have space at the end for two MSG_PROT packets: 5348 * 1. Optional NOP padding for cacheline alignment 5349 * 2. A packet that will act as a completion packet 5350 * 3. A packet that will generate MSI interrupt 5351 */ 5352 if (parser->completion) 5353 parser->patched_cb_size = parser->user_cb_size + 5354 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5355 else 5356 parser->patched_cb_size = parser->user_cb_size; 5357 5358 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5359 parser->patched_cb_size, false, false, 5360 &handle); 5361 5362 if (rc) { 5363 dev_err(hdev->dev, 5364 "Failed to allocate patched CB for DMA CS %d\n", 5365 rc); 5366 return rc; 5367 } 5368 5369 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5370 /* hl_cb_get should never fail */ 5371 if (!parser->patched_cb) { 5372 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5373 rc = -EFAULT; 5374 goto out; 5375 } 5376 5377 /* 5378 * We are protected from overflow because the check 5379 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5380 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5381 * 5382 * There is no option to reach here without going through that check because: 5383 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5384 * an external queue. 5385 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5386 */ 5387 memcpy(parser->patched_cb->kernel_address, 5388 parser->user_cb->kernel_address, 5389 parser->user_cb_size); 5390 5391 patched_cb_size = parser->patched_cb_size; 5392 5393 /* Validate patched CB instead of user CB */ 5394 user_cb = parser->user_cb; 5395 parser->user_cb = parser->patched_cb; 5396 rc = gaudi_validate_cb(hdev, parser, true); 5397 parser->user_cb = user_cb; 5398 5399 if (rc) { 5400 hl_cb_put(parser->patched_cb); 5401 goto out; 5402 } 5403 5404 if (patched_cb_size != parser->patched_cb_size) { 5405 dev_err(hdev->dev, "user CB size mismatch\n"); 5406 hl_cb_put(parser->patched_cb); 5407 rc = -EINVAL; 5408 goto out; 5409 } 5410 5411 out: 5412 /* 5413 * Always call cb destroy here because we still have 1 reference 5414 * to it by calling cb_get earlier. After the job will be completed, 5415 * cb_put will release it, but here we want to remove it from the 5416 * idr 5417 */ 5418 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5419 5420 return rc; 5421 } 5422 5423 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5424 struct hl_cs_parser *parser) 5425 { 5426 u64 handle; 5427 int rc; 5428 5429 rc = gaudi_validate_cb(hdev, parser, false); 5430 5431 if (rc) 5432 goto free_userptr; 5433 5434 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5435 parser->patched_cb_size, false, false, 5436 &handle); 5437 if (rc) { 5438 dev_err(hdev->dev, 5439 "Failed to allocate patched CB for DMA CS %d\n", rc); 5440 goto free_userptr; 5441 } 5442 5443 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5444 /* hl_cb_get should never fail here */ 5445 if (!parser->patched_cb) { 5446 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5447 rc = -EFAULT; 5448 goto out; 5449 } 5450 5451 rc = gaudi_patch_cb(hdev, parser); 5452 5453 if (rc) 5454 hl_cb_put(parser->patched_cb); 5455 5456 out: 5457 /* 5458 * Always call cb destroy here because we still have 1 reference 5459 * to it by calling cb_get earlier. After the job will be completed, 5460 * cb_put will release it, but here we want to remove it from the 5461 * idr 5462 */ 5463 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5464 5465 free_userptr: 5466 if (rc) 5467 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5468 return rc; 5469 } 5470 5471 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5472 struct hl_cs_parser *parser) 5473 { 5474 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5475 struct gaudi_device *gaudi = hdev->asic_specific; 5476 u32 nic_queue_offset, nic_mask_q_id; 5477 5478 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5479 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5480 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5481 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5482 5483 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5484 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5485 return -EINVAL; 5486 } 5487 } 5488 5489 /* For internal queue jobs just check if CB address is valid */ 5490 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5491 parser->user_cb_size, 5492 asic_prop->sram_user_base_address, 5493 asic_prop->sram_end_address)) 5494 return 0; 5495 5496 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5497 parser->user_cb_size, 5498 asic_prop->dram_user_base_address, 5499 asic_prop->dram_end_address)) 5500 return 0; 5501 5502 /* PMMU and HPMMU addresses are equal, check only one of them */ 5503 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5504 parser->user_cb_size, 5505 asic_prop->pmmu.start_addr, 5506 asic_prop->pmmu.end_addr)) 5507 return 0; 5508 5509 dev_err(hdev->dev, 5510 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5511 parser->user_cb, parser->user_cb_size); 5512 5513 return -EFAULT; 5514 } 5515 5516 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5517 { 5518 struct gaudi_device *gaudi = hdev->asic_specific; 5519 5520 if (parser->queue_type == QUEUE_TYPE_INT) 5521 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5522 5523 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5524 return gaudi_parse_cb_mmu(hdev, parser); 5525 else 5526 return gaudi_parse_cb_no_mmu(hdev, parser); 5527 } 5528 5529 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5530 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5531 u32 msi_vec, bool eb) 5532 { 5533 struct packet_msg_prot *cq_pkt; 5534 struct packet_nop *cq_padding; 5535 u64 msi_addr; 5536 u32 tmp; 5537 5538 cq_padding = kernel_address + original_len; 5539 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5540 5541 while ((void *)cq_padding < (void *)cq_pkt) { 5542 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5543 cq_padding++; 5544 } 5545 5546 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5547 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5548 5549 if (eb) 5550 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5551 5552 cq_pkt->ctl = cpu_to_le32(tmp); 5553 cq_pkt->value = cpu_to_le32(cq_val); 5554 cq_pkt->addr = cpu_to_le64(cq_addr); 5555 5556 cq_pkt++; 5557 5558 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5559 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5560 cq_pkt->ctl = cpu_to_le32(tmp); 5561 cq_pkt->value = cpu_to_le32(1); 5562 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; 5563 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5564 } 5565 5566 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5567 { 5568 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5569 } 5570 5571 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5572 u32 size, u64 val) 5573 { 5574 struct packet_lin_dma *lin_dma_pkt; 5575 struct hl_cs_job *job; 5576 u32 cb_size, ctl, err_cause; 5577 struct hl_cb *cb; 5578 int rc; 5579 5580 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5581 if (!cb) 5582 return -EFAULT; 5583 5584 lin_dma_pkt = cb->kernel_address; 5585 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5586 cb_size = sizeof(*lin_dma_pkt); 5587 5588 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5589 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5590 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5591 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5592 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5593 5594 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5595 lin_dma_pkt->src_addr = cpu_to_le64(val); 5596 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5597 lin_dma_pkt->tsize = cpu_to_le32(size); 5598 5599 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5600 if (!job) { 5601 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5602 rc = -ENOMEM; 5603 goto release_cb; 5604 } 5605 5606 /* Verify DMA is OK */ 5607 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5608 if (err_cause && !hdev->init_done) { 5609 dev_dbg(hdev->dev, 5610 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5611 err_cause); 5612 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5613 } 5614 5615 job->id = 0; 5616 job->user_cb = cb; 5617 atomic_inc(&job->user_cb->cs_cnt); 5618 job->user_cb_size = cb_size; 5619 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5620 job->patched_cb = job->user_cb; 5621 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5622 5623 hl_debugfs_add_job(hdev, job); 5624 5625 rc = gaudi_send_job_on_qman0(hdev, job); 5626 hl_debugfs_remove_job(hdev, job); 5627 kfree(job); 5628 atomic_dec(&cb->cs_cnt); 5629 5630 /* Verify DMA is OK */ 5631 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5632 if (err_cause) { 5633 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5634 rc = -EIO; 5635 if (!hdev->init_done) { 5636 dev_dbg(hdev->dev, 5637 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5638 err_cause); 5639 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5640 } 5641 } 5642 5643 release_cb: 5644 hl_cb_put(cb); 5645 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5646 5647 return rc; 5648 } 5649 5650 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5651 u32 num_regs, u32 val) 5652 { 5653 struct packet_msg_long *pkt; 5654 struct hl_cs_job *job; 5655 u32 cb_size, ctl; 5656 struct hl_cb *cb; 5657 int i, rc; 5658 5659 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5660 5661 if (cb_size > SZ_2M) { 5662 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5663 return -ENOMEM; 5664 } 5665 5666 cb = hl_cb_kernel_create(hdev, cb_size, false); 5667 if (!cb) 5668 return -EFAULT; 5669 5670 pkt = cb->kernel_address; 5671 5672 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5673 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5674 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5675 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5676 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5677 5678 for (i = 0; i < num_regs ; i++, pkt++) { 5679 pkt->ctl = cpu_to_le32(ctl); 5680 pkt->value = cpu_to_le32(val); 5681 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5682 } 5683 5684 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5685 if (!job) { 5686 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5687 rc = -ENOMEM; 5688 goto release_cb; 5689 } 5690 5691 job->id = 0; 5692 job->user_cb = cb; 5693 atomic_inc(&job->user_cb->cs_cnt); 5694 job->user_cb_size = cb_size; 5695 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5696 job->patched_cb = job->user_cb; 5697 job->job_cb_size = cb_size; 5698 5699 hl_debugfs_add_job(hdev, job); 5700 5701 rc = gaudi_send_job_on_qman0(hdev, job); 5702 hl_debugfs_remove_job(hdev, job); 5703 kfree(job); 5704 atomic_dec(&cb->cs_cnt); 5705 5706 release_cb: 5707 hl_cb_put(cb); 5708 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5709 5710 return rc; 5711 } 5712 5713 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5714 { 5715 u64 base_addr; 5716 u32 num_regs; 5717 int rc; 5718 5719 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5720 num_regs = NUM_OF_SOB_IN_BLOCK; 5721 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5722 if (rc) { 5723 dev_err(hdev->dev, "failed resetting SM registers"); 5724 return -ENOMEM; 5725 } 5726 5727 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5728 num_regs = NUM_OF_SOB_IN_BLOCK; 5729 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5730 if (rc) { 5731 dev_err(hdev->dev, "failed resetting SM registers"); 5732 return -ENOMEM; 5733 } 5734 5735 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5736 num_regs = NUM_OF_SOB_IN_BLOCK; 5737 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5738 if (rc) { 5739 dev_err(hdev->dev, "failed resetting SM registers"); 5740 return -ENOMEM; 5741 } 5742 5743 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5744 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5745 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5746 if (rc) { 5747 dev_err(hdev->dev, "failed resetting SM registers"); 5748 return -ENOMEM; 5749 } 5750 5751 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5752 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5753 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5754 if (rc) { 5755 dev_err(hdev->dev, "failed resetting SM registers"); 5756 return -ENOMEM; 5757 } 5758 5759 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5760 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5761 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5762 if (rc) { 5763 dev_err(hdev->dev, "failed resetting SM registers"); 5764 return -ENOMEM; 5765 } 5766 5767 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5768 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5769 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5770 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5771 if (rc) { 5772 dev_err(hdev->dev, "failed resetting SM registers"); 5773 return -ENOMEM; 5774 } 5775 5776 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5777 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5778 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5779 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5780 if (rc) { 5781 dev_err(hdev->dev, "failed resetting SM registers"); 5782 return -ENOMEM; 5783 } 5784 5785 return 0; 5786 } 5787 5788 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5789 { 5790 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5791 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5792 int i; 5793 5794 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5795 u64 sob_addr = CFG_BASE + 5796 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5797 (i * sob_delta); 5798 u32 dma_offset = i * DMA_CORE_OFFSET; 5799 5800 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5801 lower_32_bits(sob_addr)); 5802 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5803 upper_32_bits(sob_addr)); 5804 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5805 5806 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5807 * modified by the user for SRAM reduction 5808 */ 5809 if (i > 1) 5810 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5811 0x00000001); 5812 } 5813 } 5814 5815 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5816 { 5817 u32 qman_offset; 5818 int i; 5819 5820 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5821 qman_offset = i * DMA_QMAN_OFFSET; 5822 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5823 } 5824 5825 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5826 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5827 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5828 } 5829 5830 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5831 qman_offset = i * TPC_QMAN_OFFSET; 5832 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5833 } 5834 5835 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5836 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5837 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5838 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5839 } 5840 } 5841 5842 static int gaudi_restore_user_registers(struct hl_device *hdev) 5843 { 5844 int rc; 5845 5846 rc = gaudi_restore_sm_registers(hdev); 5847 if (rc) 5848 return rc; 5849 5850 gaudi_restore_dma_registers(hdev); 5851 gaudi_restore_qm_registers(hdev); 5852 5853 return 0; 5854 } 5855 5856 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5857 { 5858 return 0; 5859 } 5860 5861 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5862 { 5863 u32 size = hdev->asic_prop.mmu_pgt_size + 5864 hdev->asic_prop.mmu_cache_mng_size; 5865 struct gaudi_device *gaudi = hdev->asic_specific; 5866 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5867 5868 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5869 return 0; 5870 5871 return gaudi_memset_device_memory(hdev, addr, size, 0); 5872 } 5873 5874 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5875 { 5876 5877 } 5878 5879 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5880 u32 size_to_dma, dma_addr_t dma_addr) 5881 { 5882 u32 err_cause, val; 5883 u64 dma_offset; 5884 int rc; 5885 5886 dma_offset = dma_id * DMA_CORE_OFFSET; 5887 5888 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5889 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5890 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5891 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5892 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5893 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5894 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5895 5896 rc = hl_poll_timeout( 5897 hdev, 5898 mmDMA0_CORE_STS0 + dma_offset, 5899 val, 5900 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5901 0, 5902 1000000); 5903 5904 if (rc) { 5905 dev_err(hdev->dev, 5906 "DMA %d timed-out during reading of 0x%llx\n", 5907 dma_id, addr); 5908 return -EIO; 5909 } 5910 5911 /* Verify DMA is OK */ 5912 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5913 if (err_cause) { 5914 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5915 dev_dbg(hdev->dev, 5916 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5917 err_cause); 5918 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5919 5920 return -EIO; 5921 } 5922 5923 return 0; 5924 } 5925 5926 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5927 void *blob_addr) 5928 { 5929 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5930 u32 qm_glbl_sts0, qm_cgm_sts; 5931 u64 dma_offset, qm_offset; 5932 dma_addr_t dma_addr; 5933 void *kernel_addr; 5934 bool is_eng_idle; 5935 int rc = 0, dma_id; 5936 5937 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 5938 5939 if (!kernel_addr) 5940 return -ENOMEM; 5941 5942 hdev->asic_funcs->hw_queues_lock(hdev); 5943 5944 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 5945 dma_offset = dma_id * DMA_CORE_OFFSET; 5946 qm_offset = dma_id * DMA_QMAN_OFFSET; 5947 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5948 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5949 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5950 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5951 IS_DMA_IDLE(dma_core_sts0); 5952 5953 if (!is_eng_idle) { 5954 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 5955 dma_offset = dma_id * DMA_CORE_OFFSET; 5956 qm_offset = dma_id * DMA_QMAN_OFFSET; 5957 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5958 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5959 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5960 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5961 IS_DMA_IDLE(dma_core_sts0); 5962 5963 if (!is_eng_idle) { 5964 dev_err_ratelimited(hdev->dev, 5965 "Can't read via DMA because it is BUSY\n"); 5966 rc = -EAGAIN; 5967 goto out; 5968 } 5969 } 5970 5971 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 5972 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 5973 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 5974 5975 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 5976 * using the compute ctx ASID, if exists. If not, use the kernel ctx 5977 * ASID 5978 */ 5979 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 5980 5981 /* Verify DMA is OK */ 5982 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5983 if (err_cause) { 5984 dev_dbg(hdev->dev, 5985 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5986 err_cause); 5987 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5988 } 5989 5990 pos = 0; 5991 size_left = size; 5992 size_to_dma = SZ_2M; 5993 5994 while (size_left > 0) { 5995 5996 if (size_left < SZ_2M) 5997 size_to_dma = size_left; 5998 5999 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6000 dma_addr); 6001 if (rc) 6002 break; 6003 6004 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6005 6006 if (size_left <= SZ_2M) 6007 break; 6008 6009 pos += SZ_2M; 6010 addr += SZ_2M; 6011 size_left -= SZ_2M; 6012 } 6013 6014 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6015 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6016 * ASID 6017 */ 6018 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6019 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6020 6021 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6022 6023 out: 6024 hdev->asic_funcs->hw_queues_unlock(hdev); 6025 6026 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6027 6028 return rc; 6029 } 6030 6031 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6032 { 6033 struct gaudi_device *gaudi = hdev->asic_specific; 6034 6035 if (hdev->reset_info.hard_reset_pending) 6036 return U64_MAX; 6037 6038 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6039 (addr - gaudi->hbm_bar_cur_addr)); 6040 } 6041 6042 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6043 { 6044 struct gaudi_device *gaudi = hdev->asic_specific; 6045 6046 if (hdev->reset_info.hard_reset_pending) 6047 return; 6048 6049 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6050 (addr - gaudi->hbm_bar_cur_addr)); 6051 } 6052 6053 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6054 { 6055 /* mask to zero the MMBP and ASID bits */ 6056 WREG32_AND(reg, ~0x7FF); 6057 WREG32_OR(reg, asid); 6058 } 6059 6060 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6061 { 6062 struct gaudi_device *gaudi = hdev->asic_specific; 6063 6064 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6065 return; 6066 6067 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6068 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6069 return; 6070 } 6071 6072 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6073 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6074 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6075 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6076 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6077 6078 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6079 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6080 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6081 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6082 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6083 6084 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6085 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6086 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6087 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6088 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6089 6090 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6091 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6092 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6093 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6094 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6095 6096 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6097 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6098 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6099 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6100 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6101 6102 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6103 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6104 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6105 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6106 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6107 6108 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6109 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6110 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6111 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6112 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6113 6114 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6115 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6116 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6117 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6118 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6119 6120 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6121 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6122 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6123 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6124 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6125 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6126 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6127 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6128 6129 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6130 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6131 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6132 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6133 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6134 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6135 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6136 6137 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6138 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6139 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6140 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6141 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6142 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6143 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6144 6145 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6146 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6147 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6149 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6152 6153 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6155 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6160 6161 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6167 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6168 6169 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6173 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6176 6177 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6179 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6184 6185 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6191 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6192 6193 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6194 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6195 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6203 6204 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6207 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6213 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6214 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6215 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6216 6217 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6218 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6219 asid); 6220 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6221 asid); 6222 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6223 asid); 6224 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6225 asid); 6226 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6227 asid); 6228 } 6229 6230 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6231 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6232 asid); 6233 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6234 asid); 6235 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6236 asid); 6237 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6238 asid); 6239 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6240 asid); 6241 } 6242 6243 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6244 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6245 asid); 6246 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6247 asid); 6248 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6249 asid); 6250 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6251 asid); 6252 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6253 asid); 6254 } 6255 6256 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6257 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6258 asid); 6259 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6260 asid); 6261 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6262 asid); 6263 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6264 asid); 6265 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6266 asid); 6267 } 6268 6269 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6270 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6271 asid); 6272 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6273 asid); 6274 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6275 asid); 6276 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6277 asid); 6278 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6279 asid); 6280 } 6281 6282 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6283 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6284 asid); 6285 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6286 asid); 6287 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6288 asid); 6289 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6290 asid); 6291 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6292 asid); 6293 } 6294 6295 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6296 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6297 asid); 6298 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6299 asid); 6300 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6301 asid); 6302 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6303 asid); 6304 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6305 asid); 6306 } 6307 6308 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6309 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6310 asid); 6311 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6312 asid); 6313 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6314 asid); 6315 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6316 asid); 6317 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6318 asid); 6319 } 6320 6321 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6322 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6323 asid); 6324 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6325 asid); 6326 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6327 asid); 6328 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6329 asid); 6330 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6331 asid); 6332 } 6333 6334 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6335 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6336 asid); 6337 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6338 asid); 6339 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6340 asid); 6341 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6342 asid); 6343 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6344 asid); 6345 } 6346 6347 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6348 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6349 } 6350 6351 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6352 struct hl_cs_job *job) 6353 { 6354 struct packet_msg_prot *fence_pkt; 6355 u32 *fence_ptr; 6356 dma_addr_t fence_dma_addr; 6357 struct hl_cb *cb; 6358 u32 tmp, timeout, dma_offset; 6359 int rc; 6360 6361 if (hdev->pldm) 6362 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6363 else 6364 timeout = HL_DEVICE_TIMEOUT_USEC; 6365 6366 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6367 if (!fence_ptr) { 6368 dev_err(hdev->dev, 6369 "Failed to allocate fence memory for QMAN0\n"); 6370 return -ENOMEM; 6371 } 6372 6373 cb = job->patched_cb; 6374 6375 fence_pkt = cb->kernel_address + 6376 job->job_cb_size - sizeof(struct packet_msg_prot); 6377 6378 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6379 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6380 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6381 6382 fence_pkt->ctl = cpu_to_le32(tmp); 6383 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6384 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6385 6386 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6387 6388 WREG32(mmDMA0_CORE_PROT + dma_offset, 6389 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6390 6391 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6392 job->job_cb_size, cb->bus_address); 6393 if (rc) { 6394 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6395 goto free_fence_ptr; 6396 } 6397 6398 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6399 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6400 timeout, true); 6401 6402 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6403 6404 if (rc == -ETIMEDOUT) { 6405 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6406 goto free_fence_ptr; 6407 } 6408 6409 free_fence_ptr: 6410 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6411 6412 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6413 return rc; 6414 } 6415 6416 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6417 { 6418 if (event_type >= GAUDI_EVENT_SIZE) 6419 goto event_not_supported; 6420 6421 if (!gaudi_irq_map_table[event_type].valid) 6422 goto event_not_supported; 6423 6424 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6425 6426 return; 6427 6428 event_not_supported: 6429 snprintf(desc, size, "N/A"); 6430 } 6431 6432 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6433 bool is_write, u16 *engine_id_1, 6434 u16 *engine_id_2) 6435 { 6436 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6437 6438 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6439 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6440 6441 switch (x_y) { 6442 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6443 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6444 dma_id[0] = 0; 6445 dma_id[1] = 2; 6446 break; 6447 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6448 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6449 dma_id[0] = 1; 6450 dma_id[1] = 3; 6451 break; 6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6453 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6454 dma_id[0] = 4; 6455 dma_id[1] = 6; 6456 break; 6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6458 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6459 dma_id[0] = 5; 6460 dma_id[1] = 7; 6461 break; 6462 default: 6463 goto unknown_initiator; 6464 } 6465 6466 for (i = 0 ; i < 2 ; i++) { 6467 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6468 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6469 } 6470 6471 switch (x_y) { 6472 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6473 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6474 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6475 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6476 return "DMA0"; 6477 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6478 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6479 return "DMA2"; 6480 } else { 6481 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6482 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6483 return "DMA0 or DMA2"; 6484 } 6485 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6486 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6487 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6488 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6489 return "DMA1"; 6490 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6491 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6492 return "DMA3"; 6493 } else { 6494 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6495 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6496 return "DMA1 or DMA3"; 6497 } 6498 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6499 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6500 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6501 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6502 return "DMA4"; 6503 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6504 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6505 return "DMA6"; 6506 } else { 6507 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6508 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6509 return "DMA4 or DMA6"; 6510 } 6511 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6512 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6513 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6514 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6515 return "DMA5"; 6516 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6517 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6518 return "DMA7"; 6519 } else { 6520 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6521 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6522 return "DMA5 or DMA7"; 6523 } 6524 } 6525 6526 unknown_initiator: 6527 return "unknown initiator"; 6528 } 6529 6530 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6531 u16 *engine_id_1, u16 *engine_id_2) 6532 { 6533 u32 val, x_y, axi_id; 6534 6535 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6536 RREG32(mmMMU_UP_RAZWI_READ_ID); 6537 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6538 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6539 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6540 RAZWI_INITIATOR_AXI_ID_SHIFT); 6541 6542 switch (x_y) { 6543 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6544 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6545 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6546 return "TPC0"; 6547 } 6548 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6549 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6550 return "NIC0"; 6551 } 6552 break; 6553 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6554 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6555 return "TPC1"; 6556 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6557 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6558 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6559 return "MME0"; 6560 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6561 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6562 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6563 return "MME1"; 6564 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6565 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6566 return "TPC2"; 6567 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6568 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6569 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6570 return "TPC3"; 6571 } 6572 /* PCI, CPU or PSOC does not have engine id*/ 6573 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6574 return "PCI"; 6575 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6576 return "CPU"; 6577 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6578 return "PSOC"; 6579 break; 6580 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6581 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6582 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6583 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6586 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6587 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6588 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6589 engine_id_1, engine_id_2); 6590 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6591 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6592 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6593 return "TPC4"; 6594 } 6595 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6596 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6597 return "NIC1"; 6598 } 6599 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6600 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6601 return "NIC2"; 6602 } 6603 break; 6604 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6605 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6606 return "TPC5"; 6607 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6608 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6609 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6610 return "MME2"; 6611 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6612 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6613 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6614 return "MME3"; 6615 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6616 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6617 return "TPC6"; 6618 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6619 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6620 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6621 return "TPC7"; 6622 } 6623 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6624 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6625 return "NIC4"; 6626 } 6627 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6628 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6629 return "NIC5"; 6630 } 6631 break; 6632 default: 6633 break; 6634 } 6635 6636 dev_err(hdev->dev, 6637 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6638 val, 6639 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6640 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6641 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6642 RAZWI_INITIATOR_AXI_ID_MASK); 6643 6644 return "unknown initiator"; 6645 } 6646 6647 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6648 u16 *engine_id_2, bool *is_read, bool *is_write) 6649 { 6650 6651 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6652 dev_err_ratelimited(hdev->dev, 6653 "RAZWI event caused by illegal write of %s\n", 6654 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6655 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6656 *is_write = true; 6657 } 6658 6659 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6660 dev_err_ratelimited(hdev->dev, 6661 "RAZWI event caused by illegal read of %s\n", 6662 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6663 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6664 *is_read = true; 6665 } 6666 } 6667 6668 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6669 { 6670 struct gaudi_device *gaudi = hdev->asic_specific; 6671 u32 val; 6672 6673 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6674 return; 6675 6676 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6677 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6678 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6679 *addr <<= 32; 6680 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6681 6682 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6683 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6684 6685 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6686 } 6687 6688 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6689 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6690 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6691 *addr <<= 32; 6692 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6693 6694 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6695 6696 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6697 } 6698 } 6699 6700 /* 6701 * +-------------------+------------------------------------------------------+ 6702 * | Configuration Reg | Description | 6703 * | Address | | 6704 * +-------------------+------------------------------------------------------+ 6705 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6706 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6707 * | |0xF34 memory wrappers 63:32 | 6708 * | |0xF38 memory wrappers 95:64 | 6709 * | |0xF3C memory wrappers 127:96 | 6710 * +-------------------+------------------------------------------------------+ 6711 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6712 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6713 * | |0xF44 memory wrappers 63:32 | 6714 * | |0xF48 memory wrappers 95:64 | 6715 * | |0xF4C memory wrappers 127:96 | 6716 * +-------------------+------------------------------------------------------+ 6717 */ 6718 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6719 struct ecc_info_extract_params *params, u64 *ecc_address, 6720 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6721 { 6722 u32 i, num_mem_regs, reg, err_bit; 6723 u64 err_addr, err_word = 0; 6724 6725 num_mem_regs = params->num_memories / 32 + 6726 ((params->num_memories % 32) ? 1 : 0); 6727 6728 if (params->block_address >= CFG_BASE) 6729 params->block_address -= CFG_BASE; 6730 6731 if (params->derr) 6732 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6733 else 6734 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6735 6736 /* Set invalid wrapper index */ 6737 *memory_wrapper_idx = 0xFF; 6738 6739 /* Iterate through memory wrappers, a single bit must be set */ 6740 for (i = 0 ; i < num_mem_regs ; i++) { 6741 err_addr += i * 4; 6742 err_word = RREG32(err_addr); 6743 if (err_word) { 6744 err_bit = __ffs(err_word); 6745 *memory_wrapper_idx = err_bit + (32 * i); 6746 break; 6747 } 6748 } 6749 6750 if (*memory_wrapper_idx == 0xFF) { 6751 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6752 return -EINVAL; 6753 } 6754 6755 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6756 *memory_wrapper_idx); 6757 6758 *ecc_address = 6759 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6760 *ecc_syndrom = 6761 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6762 6763 /* Clear error indication */ 6764 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6765 if (params->derr) 6766 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6767 else 6768 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6769 6770 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6771 6772 return 0; 6773 } 6774 6775 /* 6776 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6777 * 6778 * @idx: the current pi/ci value 6779 * @q_len: the queue length (power of 2) 6780 * 6781 * @return the cyclically decremented index 6782 */ 6783 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6784 { 6785 u32 mask = q_len - 1; 6786 6787 /* 6788 * modular decrement is equivalent to adding (queue_size -1) 6789 * later we take LSBs to make sure the value is in the 6790 * range [0, queue_len - 1] 6791 */ 6792 return (idx + q_len - 1) & mask; 6793 } 6794 6795 /** 6796 * gaudi_handle_sw_config_stream_data - print SW config stream data 6797 * 6798 * @hdev: pointer to the habanalabs device structure 6799 * @stream: the QMAN's stream 6800 * @qman_base: base address of QMAN registers block 6801 * @event_mask: mask of the last events occurred 6802 */ 6803 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6804 u64 qman_base, u64 event_mask) 6805 { 6806 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6807 u32 cq_ptr_lo_off, size; 6808 6809 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6810 6811 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6812 stream * cq_ptr_lo_off; 6813 cq_ptr_hi = cq_ptr_lo + 6814 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6815 cq_tsize = cq_ptr_lo + 6816 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6817 6818 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6819 size = RREG32(cq_tsize); 6820 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6821 stream, cq_ptr, size); 6822 6823 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6824 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6825 hdev->captured_err_info.undef_opcode.cq_size = size; 6826 hdev->captured_err_info.undef_opcode.stream_id = stream; 6827 } 6828 } 6829 6830 /** 6831 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6832 * 6833 * @hdev: pointer to the habanalabs device structure 6834 * @qid_base: first QID of the QMAN (out of 4 streams) 6835 * @stream: the QMAN's stream 6836 * @qman_base: base address of QMAN registers block 6837 * @event_mask: mask of the last events occurred 6838 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6839 */ 6840 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6841 u32 stream, u64 qman_base, 6842 u64 event_mask, 6843 bool pr_sw_conf) 6844 { 6845 u32 ci, qm_ci_stream_off, queue_len; 6846 struct hl_hw_queue *q; 6847 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6848 int i; 6849 6850 q = &hdev->kernel_queues[qid_base + stream]; 6851 6852 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6853 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6854 stream * qm_ci_stream_off; 6855 6856 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6857 q->int_queue_len : HL_QUEUE_LENGTH; 6858 6859 hdev->asic_funcs->hw_queues_lock(hdev); 6860 6861 if (pr_sw_conf) 6862 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6863 6864 ci = RREG32(pq_ci); 6865 6866 /* we should start printing form ci -1 */ 6867 ci = gaudi_queue_idx_dec(ci, queue_len); 6868 memset(addr, 0, sizeof(addr)); 6869 6870 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6871 struct hl_bd *bd; 6872 u32 len; 6873 6874 bd = q->kernel_address; 6875 bd += ci; 6876 6877 len = le32_to_cpu(bd->len); 6878 /* len 0 means uninitialized entry- break */ 6879 if (!len) 6880 break; 6881 6882 addr[i] = le64_to_cpu(bd->ptr); 6883 6884 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6885 stream, ci, addr[i], len); 6886 6887 /* get previous ci, wrap if needed */ 6888 ci = gaudi_queue_idx_dec(ci, queue_len); 6889 } 6890 6891 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6892 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6893 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6894 6895 if (arr_idx == 0) { 6896 undef_opcode->timestamp = ktime_get(); 6897 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6898 } 6899 6900 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6901 undef_opcode->cb_addr_streams_len++; 6902 } 6903 6904 hdev->asic_funcs->hw_queues_unlock(hdev); 6905 } 6906 6907 /** 6908 * handle_qman_data_on_err - extract QMAN data on error 6909 * 6910 * @hdev: pointer to the habanalabs device structure 6911 * @qid_base: first QID of the QMAN (out of 4 streams) 6912 * @stream: the QMAN's stream 6913 * @qman_base: base address of QMAN registers block 6914 * @event_mask: mask of the last events occurred 6915 * 6916 * This function attempt to exatract as much data as possible on QMAN error. 6917 * On upper CP print the SW config stream data and last 8 PQEs. 6918 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6919 */ 6920 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6921 u32 stream, u64 qman_base, u64 event_mask) 6922 { 6923 u32 i; 6924 6925 if (stream != QMAN_STREAMS) { 6926 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6927 qman_base, event_mask, true); 6928 return; 6929 } 6930 6931 /* handle Lower-CP */ 6932 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6933 6934 for (i = 0; i < QMAN_STREAMS; i++) 6935 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 6936 qman_base, event_mask, false); 6937 } 6938 6939 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 6940 const char *qm_name, 6941 u64 qman_base, 6942 u32 qid_base, 6943 u64 *event_mask) 6944 { 6945 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 6946 u64 glbl_sts_addr, arb_err_addr; 6947 char reg_desc[32]; 6948 6949 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 6950 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 6951 6952 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 6953 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 6954 glbl_sts_clr_val = 0; 6955 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 6956 6957 if (!glbl_sts_val) 6958 continue; 6959 6960 if (i == QMAN_STREAMS) 6961 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 6962 else 6963 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 6964 6965 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 6966 if (glbl_sts_val & BIT(j)) { 6967 dev_err_ratelimited(hdev->dev, 6968 "%s %s. err cause: %s\n", 6969 qm_name, reg_desc, 6970 gaudi_qman_error_cause[j]); 6971 glbl_sts_clr_val |= BIT(j); 6972 } 6973 } 6974 /* check for undefined opcode */ 6975 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 6976 hdev->captured_err_info.undef_opcode.write_enable) { 6977 memset(&hdev->captured_err_info.undef_opcode, 0, 6978 sizeof(hdev->captured_err_info.undef_opcode)); 6979 6980 hdev->captured_err_info.undef_opcode.write_enable = false; 6981 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 6982 } 6983 6984 /* Write 1 clear errors */ 6985 if (!hdev->stop_on_err) 6986 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 6987 else 6988 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 6989 } 6990 6991 arb_err_val = RREG32(arb_err_addr); 6992 6993 if (!arb_err_val) 6994 return; 6995 6996 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 6997 if (arb_err_val & BIT(j)) { 6998 dev_err_ratelimited(hdev->dev, 6999 "%s ARB_ERR. err cause: %s\n", 7000 qm_name, 7001 gaudi_qman_arb_error_cause[j]); 7002 } 7003 } 7004 } 7005 7006 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7007 struct hl_eq_sm_sei_data *sei_data) 7008 { 7009 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7010 7011 /* Flip the bits as the enum is ordered in the opposite way */ 7012 index = (index ^ 0x3) & 0x3; 7013 7014 switch (sei_data->sei_cause) { 7015 case SM_SEI_SO_OVERFLOW: 7016 dev_err_ratelimited(hdev->dev, 7017 "%s SEI Error: SOB Group %u overflow/underflow", 7018 gaudi_sync_manager_names[index], 7019 le32_to_cpu(sei_data->sei_log)); 7020 break; 7021 case SM_SEI_LBW_4B_UNALIGNED: 7022 dev_err_ratelimited(hdev->dev, 7023 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7024 gaudi_sync_manager_names[index], 7025 le32_to_cpu(sei_data->sei_log)); 7026 break; 7027 case SM_SEI_AXI_RESPONSE_ERR: 7028 dev_err_ratelimited(hdev->dev, 7029 "%s SEI Error: AXI ID %u response error", 7030 gaudi_sync_manager_names[index], 7031 le32_to_cpu(sei_data->sei_log)); 7032 break; 7033 default: 7034 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7035 le32_to_cpu(sei_data->sei_log)); 7036 break; 7037 } 7038 } 7039 7040 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7041 struct hl_eq_ecc_data *ecc_data) 7042 { 7043 struct ecc_info_extract_params params; 7044 u64 ecc_address = 0, ecc_syndrom = 0; 7045 u8 index, memory_wrapper_idx = 0; 7046 bool extract_info_from_fw; 7047 int rc; 7048 7049 if (hdev->asic_prop.fw_security_enabled) { 7050 extract_info_from_fw = true; 7051 goto extract_ecc_info; 7052 } 7053 7054 switch (event_type) { 7055 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7056 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7057 extract_info_from_fw = true; 7058 break; 7059 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7060 index = event_type - GAUDI_EVENT_TPC0_SERR; 7061 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7062 params.num_memories = 90; 7063 params.derr = false; 7064 extract_info_from_fw = false; 7065 break; 7066 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7067 index = event_type - GAUDI_EVENT_TPC0_DERR; 7068 params.block_address = 7069 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7070 params.num_memories = 90; 7071 params.derr = true; 7072 extract_info_from_fw = false; 7073 break; 7074 case GAUDI_EVENT_MME0_ACC_SERR: 7075 case GAUDI_EVENT_MME1_ACC_SERR: 7076 case GAUDI_EVENT_MME2_ACC_SERR: 7077 case GAUDI_EVENT_MME3_ACC_SERR: 7078 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7079 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7080 params.num_memories = 128; 7081 params.derr = false; 7082 extract_info_from_fw = false; 7083 break; 7084 case GAUDI_EVENT_MME0_ACC_DERR: 7085 case GAUDI_EVENT_MME1_ACC_DERR: 7086 case GAUDI_EVENT_MME2_ACC_DERR: 7087 case GAUDI_EVENT_MME3_ACC_DERR: 7088 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7089 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7090 params.num_memories = 128; 7091 params.derr = true; 7092 extract_info_from_fw = false; 7093 break; 7094 case GAUDI_EVENT_MME0_SBAB_SERR: 7095 case GAUDI_EVENT_MME1_SBAB_SERR: 7096 case GAUDI_EVENT_MME2_SBAB_SERR: 7097 case GAUDI_EVENT_MME3_SBAB_SERR: 7098 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7099 params.block_address = 7100 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7101 params.num_memories = 33; 7102 params.derr = false; 7103 extract_info_from_fw = false; 7104 break; 7105 case GAUDI_EVENT_MME0_SBAB_DERR: 7106 case GAUDI_EVENT_MME1_SBAB_DERR: 7107 case GAUDI_EVENT_MME2_SBAB_DERR: 7108 case GAUDI_EVENT_MME3_SBAB_DERR: 7109 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7110 params.block_address = 7111 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7112 params.num_memories = 33; 7113 params.derr = true; 7114 extract_info_from_fw = false; 7115 break; 7116 default: 7117 return; 7118 } 7119 7120 extract_ecc_info: 7121 if (extract_info_from_fw) { 7122 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7123 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7124 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7125 } else { 7126 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7127 &ecc_syndrom, &memory_wrapper_idx); 7128 if (rc) 7129 return; 7130 } 7131 7132 dev_err(hdev->dev, 7133 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7134 ecc_address, ecc_syndrom, memory_wrapper_idx); 7135 } 7136 7137 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7138 { 7139 u64 qman_base; 7140 char desc[32]; 7141 u32 qid_base; 7142 u8 index; 7143 7144 switch (event_type) { 7145 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7146 index = event_type - GAUDI_EVENT_TPC0_QM; 7147 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7148 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7149 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7150 break; 7151 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7152 if (event_type == GAUDI_EVENT_MME0_QM) { 7153 index = 0; 7154 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7155 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7156 index = 2; 7157 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7158 } 7159 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7160 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7161 break; 7162 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7163 index = event_type - GAUDI_EVENT_DMA0_QM; 7164 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7165 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7166 if (index > 1) 7167 qid_base++; 7168 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7169 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7170 break; 7171 case GAUDI_EVENT_NIC0_QM0: 7172 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7173 qman_base = mmNIC0_QM0_BASE; 7174 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7175 break; 7176 case GAUDI_EVENT_NIC0_QM1: 7177 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7178 qman_base = mmNIC0_QM1_BASE; 7179 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7180 break; 7181 case GAUDI_EVENT_NIC1_QM0: 7182 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7183 qman_base = mmNIC1_QM0_BASE; 7184 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7185 break; 7186 case GAUDI_EVENT_NIC1_QM1: 7187 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7188 qman_base = mmNIC1_QM1_BASE; 7189 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7190 break; 7191 case GAUDI_EVENT_NIC2_QM0: 7192 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7193 qman_base = mmNIC2_QM0_BASE; 7194 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7195 break; 7196 case GAUDI_EVENT_NIC2_QM1: 7197 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7198 qman_base = mmNIC2_QM1_BASE; 7199 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7200 break; 7201 case GAUDI_EVENT_NIC3_QM0: 7202 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7203 qman_base = mmNIC3_QM0_BASE; 7204 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7205 break; 7206 case GAUDI_EVENT_NIC3_QM1: 7207 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7208 qman_base = mmNIC3_QM1_BASE; 7209 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7210 break; 7211 case GAUDI_EVENT_NIC4_QM0: 7212 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7213 qman_base = mmNIC4_QM0_BASE; 7214 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7215 break; 7216 case GAUDI_EVENT_NIC4_QM1: 7217 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7218 qman_base = mmNIC4_QM1_BASE; 7219 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7220 break; 7221 default: 7222 return; 7223 } 7224 7225 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7226 } 7227 7228 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7229 bool check_razwi, u64 *event_mask) 7230 { 7231 bool is_read = false, is_write = false; 7232 u16 engine_id[2], num_of_razwi_eng = 0; 7233 char desc[64] = ""; 7234 u64 razwi_addr = 0; 7235 u8 razwi_flags = 0; 7236 7237 /* 7238 * Init engine id by default as not valid and only if razwi initiated from engine with 7239 * engine id it will get valid value. 7240 */ 7241 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7242 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7243 7244 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7245 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7246 event_type, desc); 7247 7248 if (check_razwi) { 7249 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7250 &is_write); 7251 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7252 7253 if (is_read) 7254 razwi_flags |= HL_RAZWI_READ; 7255 if (is_write) 7256 razwi_flags |= HL_RAZWI_WRITE; 7257 7258 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7259 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7260 num_of_razwi_eng = 2; 7261 else 7262 num_of_razwi_eng = 1; 7263 } 7264 7265 if (razwi_flags) 7266 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7267 razwi_flags, event_mask); 7268 } 7269 } 7270 7271 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7272 struct cpucp_pkt_sync_err *sync_err) 7273 { 7274 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7275 7276 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7277 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7278 } 7279 7280 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7281 struct hl_eq_fw_alive *fw_alive) 7282 { 7283 dev_err(hdev->dev, 7284 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7285 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7286 le32_to_cpu(fw_alive->process_id), 7287 le32_to_cpu(fw_alive->thread_id), 7288 le64_to_cpu(fw_alive->uptime_seconds)); 7289 } 7290 7291 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7292 void *data) 7293 { 7294 char desc[64] = "", *type; 7295 struct eq_nic_sei_event *eq_nic_sei = data; 7296 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7297 7298 switch (eq_nic_sei->axi_error_cause) { 7299 case RXB: 7300 type = "RXB"; 7301 break; 7302 case RXE: 7303 type = "RXE"; 7304 break; 7305 case TXS: 7306 type = "TXS"; 7307 break; 7308 case TXE: 7309 type = "TXE"; 7310 break; 7311 case QPC_RESP: 7312 type = "QPC_RESP"; 7313 break; 7314 case NON_AXI_ERR: 7315 type = "NON_AXI_ERR"; 7316 break; 7317 case TMR: 7318 type = "TMR"; 7319 break; 7320 default: 7321 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7322 eq_nic_sei->axi_error_cause); 7323 type = "N/A"; 7324 break; 7325 } 7326 7327 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7328 eq_nic_sei->id); 7329 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7330 event_type, desc); 7331 } 7332 7333 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7334 { 7335 /* GAUDI doesn't support any reset except hard-reset */ 7336 return -EPERM; 7337 } 7338 7339 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7340 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7341 { 7342 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7343 int rc = 0; 7344 7345 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7346 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7347 if (!hbm_ecc_data) { 7348 dev_err(hdev->dev, "No FW ECC data"); 7349 return 0; 7350 } 7351 7352 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7353 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7354 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7355 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7356 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7357 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7358 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7359 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7360 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7361 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7362 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7363 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7364 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7365 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7366 7367 dev_err(hdev->dev, 7368 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7369 device, ch, wr_par, rd_par, ca_par, serr, derr); 7370 dev_err(hdev->dev, 7371 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7372 device, ch, hbm_ecc_data->first_addr, type, 7373 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7374 hbm_ecc_data->dec_cnt); 7375 return 0; 7376 } 7377 7378 if (hdev->asic_prop.fw_security_enabled) { 7379 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7380 return 0; 7381 } 7382 7383 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7384 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7385 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7386 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7387 if (val) { 7388 rc = -EIO; 7389 dev_err(hdev->dev, 7390 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7391 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7392 (val >> 2) & 0x1, (val >> 3) & 0x1, 7393 (val >> 4) & 0x1); 7394 7395 val2 = RREG32(base + ch * 0x1000 + 0x060); 7396 dev_err(hdev->dev, 7397 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7398 device, ch * 2, 7399 RREG32(base + ch * 0x1000 + 0x064), 7400 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7401 (val2 & 0xFF0000) >> 16, 7402 (val2 & 0xFF000000) >> 24); 7403 } 7404 7405 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7406 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7407 if (val) { 7408 rc = -EIO; 7409 dev_err(hdev->dev, 7410 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7411 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7412 (val >> 2) & 0x1, (val >> 3) & 0x1, 7413 (val >> 4) & 0x1); 7414 7415 val2 = RREG32(base + ch * 0x1000 + 0x070); 7416 dev_err(hdev->dev, 7417 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7418 device, ch * 2 + 1, 7419 RREG32(base + ch * 0x1000 + 0x074), 7420 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7421 (val2 & 0xFF0000) >> 16, 7422 (val2 & 0xFF000000) >> 24); 7423 } 7424 7425 /* Clear interrupts */ 7426 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7427 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7428 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7429 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7430 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7431 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7432 } 7433 7434 val = RREG32(base + 0x8F30); 7435 val2 = RREG32(base + 0x8F34); 7436 if (val | val2) { 7437 rc = -EIO; 7438 dev_err(hdev->dev, 7439 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7440 device, val, val2); 7441 } 7442 val = RREG32(base + 0x8F40); 7443 val2 = RREG32(base + 0x8F44); 7444 if (val | val2) { 7445 rc = -EIO; 7446 dev_err(hdev->dev, 7447 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7448 device, val, val2); 7449 } 7450 7451 return rc; 7452 } 7453 7454 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7455 { 7456 switch (hbm_event_type) { 7457 case GAUDI_EVENT_HBM0_SPI_0: 7458 case GAUDI_EVENT_HBM0_SPI_1: 7459 return 0; 7460 case GAUDI_EVENT_HBM1_SPI_0: 7461 case GAUDI_EVENT_HBM1_SPI_1: 7462 return 1; 7463 case GAUDI_EVENT_HBM2_SPI_0: 7464 case GAUDI_EVENT_HBM2_SPI_1: 7465 return 2; 7466 case GAUDI_EVENT_HBM3_SPI_0: 7467 case GAUDI_EVENT_HBM3_SPI_1: 7468 return 3; 7469 default: 7470 break; 7471 } 7472 7473 /* Should never happen */ 7474 return 0; 7475 } 7476 7477 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7478 char *interrupt_name) 7479 { 7480 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7481 bool soft_reset_required = false; 7482 7483 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7484 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7485 7486 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7487 if (tpc_interrupts_cause & BIT(i)) { 7488 dev_err_ratelimited(hdev->dev, 7489 "TPC%d_%s interrupt cause: %s\n", 7490 tpc_id, interrupt_name, 7491 gaudi_tpc_interrupts_cause[i]); 7492 /* If this is QM error, we need to soft-reset */ 7493 if (i == 15) 7494 soft_reset_required = true; 7495 } 7496 7497 /* Clear interrupts */ 7498 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7499 7500 return soft_reset_required; 7501 } 7502 7503 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7504 { 7505 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7506 } 7507 7508 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7509 { 7510 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7511 } 7512 7513 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7514 { 7515 ktime_t zero_time = ktime_set(0, 0); 7516 7517 mutex_lock(&hdev->clk_throttling.lock); 7518 7519 switch (event_type) { 7520 case GAUDI_EVENT_FIX_POWER_ENV_S: 7521 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7522 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7523 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7524 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7525 dev_info_ratelimited(hdev->dev, 7526 "Clock throttling due to power consumption\n"); 7527 break; 7528 7529 case GAUDI_EVENT_FIX_POWER_ENV_E: 7530 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7531 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7532 dev_info_ratelimited(hdev->dev, 7533 "Power envelop is safe, back to optimal clock\n"); 7534 break; 7535 7536 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7537 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7538 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7540 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7541 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7542 dev_info_ratelimited(hdev->dev, 7543 "Clock throttling due to overheating\n"); 7544 break; 7545 7546 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7547 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7548 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7549 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7550 dev_info_ratelimited(hdev->dev, 7551 "Thermal envelop is safe, back to optimal clock\n"); 7552 break; 7553 7554 default: 7555 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7556 event_type); 7557 break; 7558 } 7559 7560 mutex_unlock(&hdev->clk_throttling.lock); 7561 } 7562 7563 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7564 { 7565 struct gaudi_device *gaudi = hdev->asic_specific; 7566 struct hl_info_fw_err_info fw_err_info; 7567 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7568 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7569 u32 fw_fatal_err_flag = 0, flags = 0; 7570 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7571 >> EQ_CTL_EVENT_TYPE_SHIFT); 7572 bool reset_required, reset_direct = false; 7573 u8 cause; 7574 int rc; 7575 7576 if (event_type >= GAUDI_EVENT_SIZE) { 7577 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7578 event_type, GAUDI_EVENT_SIZE - 1); 7579 return; 7580 } 7581 7582 gaudi->events_stat[event_type]++; 7583 gaudi->events_stat_aggregate[event_type]++; 7584 7585 switch (event_type) { 7586 case GAUDI_EVENT_PCIE_CORE_DERR: 7587 case GAUDI_EVENT_PCIE_IF_DERR: 7588 case GAUDI_EVENT_PCIE_PHY_DERR: 7589 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7590 case GAUDI_EVENT_MME0_ACC_DERR: 7591 case GAUDI_EVENT_MME0_SBAB_DERR: 7592 case GAUDI_EVENT_MME1_ACC_DERR: 7593 case GAUDI_EVENT_MME1_SBAB_DERR: 7594 case GAUDI_EVENT_MME2_ACC_DERR: 7595 case GAUDI_EVENT_MME2_SBAB_DERR: 7596 case GAUDI_EVENT_MME3_ACC_DERR: 7597 case GAUDI_EVENT_MME3_SBAB_DERR: 7598 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7599 fallthrough; 7600 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7601 case GAUDI_EVENT_PSOC_MEM_DERR: 7602 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7603 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7604 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7605 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7606 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7607 case GAUDI_EVENT_MMU_DERR: 7608 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7609 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7610 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7611 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7612 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7613 goto reset_device; 7614 7615 case GAUDI_EVENT_GIC500: 7616 case GAUDI_EVENT_AXI_ECC: 7617 case GAUDI_EVENT_L2_RAM_ECC: 7618 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7619 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7620 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7621 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7622 goto reset_device; 7623 7624 case GAUDI_EVENT_HBM0_SPI_0: 7625 case GAUDI_EVENT_HBM1_SPI_0: 7626 case GAUDI_EVENT_HBM2_SPI_0: 7627 case GAUDI_EVENT_HBM3_SPI_0: 7628 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7629 gaudi_hbm_read_interrupts(hdev, 7630 gaudi_hbm_event_to_dev(event_type), 7631 &eq_entry->hbm_ecc_data); 7632 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7633 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7634 goto reset_device; 7635 7636 case GAUDI_EVENT_HBM0_SPI_1: 7637 case GAUDI_EVENT_HBM1_SPI_1: 7638 case GAUDI_EVENT_HBM2_SPI_1: 7639 case GAUDI_EVENT_HBM3_SPI_1: 7640 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7641 gaudi_hbm_read_interrupts(hdev, 7642 gaudi_hbm_event_to_dev(event_type), 7643 &eq_entry->hbm_ecc_data); 7644 hl_fw_unmask_irq(hdev, event_type); 7645 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7646 break; 7647 7648 case GAUDI_EVENT_TPC0_DEC: 7649 case GAUDI_EVENT_TPC1_DEC: 7650 case GAUDI_EVENT_TPC2_DEC: 7651 case GAUDI_EVENT_TPC3_DEC: 7652 case GAUDI_EVENT_TPC4_DEC: 7653 case GAUDI_EVENT_TPC5_DEC: 7654 case GAUDI_EVENT_TPC6_DEC: 7655 case GAUDI_EVENT_TPC7_DEC: 7656 /* In TPC DEC event, notify on TPC assertion. While there isn't 7657 * a specific event for assertion yet, the FW generates TPC DEC event. 7658 * The SW upper layer will inspect an internal mapped area to indicate 7659 * if the event is a TPC Assertion or a "real" TPC DEC. 7660 */ 7661 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7662 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7663 reset_required = gaudi_tpc_read_interrupts(hdev, 7664 tpc_dec_event_to_tpc_id(event_type), 7665 "AXI_SLV_DEC_Error"); 7666 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7667 if (reset_required) { 7668 dev_err(hdev->dev, "reset required due to %s\n", 7669 gaudi_irq_map_table[event_type].name); 7670 7671 reset_direct = true; 7672 goto reset_device; 7673 } else { 7674 hl_fw_unmask_irq(hdev, event_type); 7675 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7676 } 7677 break; 7678 7679 case GAUDI_EVENT_TPC0_KRN_ERR: 7680 case GAUDI_EVENT_TPC1_KRN_ERR: 7681 case GAUDI_EVENT_TPC2_KRN_ERR: 7682 case GAUDI_EVENT_TPC3_KRN_ERR: 7683 case GAUDI_EVENT_TPC4_KRN_ERR: 7684 case GAUDI_EVENT_TPC5_KRN_ERR: 7685 case GAUDI_EVENT_TPC6_KRN_ERR: 7686 case GAUDI_EVENT_TPC7_KRN_ERR: 7687 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7688 reset_required = gaudi_tpc_read_interrupts(hdev, 7689 tpc_krn_event_to_tpc_id(event_type), 7690 "KRN_ERR"); 7691 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7692 if (reset_required) { 7693 dev_err(hdev->dev, "reset required due to %s\n", 7694 gaudi_irq_map_table[event_type].name); 7695 7696 reset_direct = true; 7697 goto reset_device; 7698 } else { 7699 hl_fw_unmask_irq(hdev, event_type); 7700 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7701 } 7702 break; 7703 7704 case GAUDI_EVENT_PCIE_CORE_SERR: 7705 case GAUDI_EVENT_PCIE_IF_SERR: 7706 case GAUDI_EVENT_PCIE_PHY_SERR: 7707 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7708 case GAUDI_EVENT_MME0_ACC_SERR: 7709 case GAUDI_EVENT_MME0_SBAB_SERR: 7710 case GAUDI_EVENT_MME1_ACC_SERR: 7711 case GAUDI_EVENT_MME1_SBAB_SERR: 7712 case GAUDI_EVENT_MME2_ACC_SERR: 7713 case GAUDI_EVENT_MME2_SBAB_SERR: 7714 case GAUDI_EVENT_MME3_ACC_SERR: 7715 case GAUDI_EVENT_MME3_SBAB_SERR: 7716 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7717 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7718 case GAUDI_EVENT_PSOC_MEM_SERR: 7719 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7720 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7721 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7722 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7723 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7724 fallthrough; 7725 case GAUDI_EVENT_MMU_SERR: 7726 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7727 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7728 hl_fw_unmask_irq(hdev, event_type); 7729 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7730 break; 7731 7732 case GAUDI_EVENT_PCIE_DEC: 7733 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7734 case GAUDI_EVENT_PSOC_AXI_DEC: 7735 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7736 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7737 hl_fw_unmask_irq(hdev, event_type); 7738 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7739 break; 7740 7741 case GAUDI_EVENT_MMU_PAGE_FAULT: 7742 case GAUDI_EVENT_MMU_WR_PERM: 7743 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7744 hl_fw_unmask_irq(hdev, event_type); 7745 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7746 break; 7747 7748 case GAUDI_EVENT_MME0_WBC_RSP: 7749 case GAUDI_EVENT_MME0_SBAB0_RSP: 7750 case GAUDI_EVENT_MME1_WBC_RSP: 7751 case GAUDI_EVENT_MME1_SBAB0_RSP: 7752 case GAUDI_EVENT_MME2_WBC_RSP: 7753 case GAUDI_EVENT_MME2_SBAB0_RSP: 7754 case GAUDI_EVENT_MME3_WBC_RSP: 7755 case GAUDI_EVENT_MME3_SBAB0_RSP: 7756 case GAUDI_EVENT_RAZWI_OR_ADC: 7757 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7758 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7759 fallthrough; 7760 case GAUDI_EVENT_NIC0_QM0: 7761 case GAUDI_EVENT_NIC0_QM1: 7762 case GAUDI_EVENT_NIC1_QM0: 7763 case GAUDI_EVENT_NIC1_QM1: 7764 case GAUDI_EVENT_NIC2_QM0: 7765 case GAUDI_EVENT_NIC2_QM1: 7766 case GAUDI_EVENT_NIC3_QM0: 7767 case GAUDI_EVENT_NIC3_QM1: 7768 case GAUDI_EVENT_NIC4_QM0: 7769 case GAUDI_EVENT_NIC4_QM1: 7770 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7771 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7772 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7773 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7774 hl_fw_unmask_irq(hdev, event_type); 7775 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7776 break; 7777 7778 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7779 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7780 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7781 goto reset_device; 7782 7783 case GAUDI_EVENT_TPC0_BMON_SPMU: 7784 case GAUDI_EVENT_TPC1_BMON_SPMU: 7785 case GAUDI_EVENT_TPC2_BMON_SPMU: 7786 case GAUDI_EVENT_TPC3_BMON_SPMU: 7787 case GAUDI_EVENT_TPC4_BMON_SPMU: 7788 case GAUDI_EVENT_TPC5_BMON_SPMU: 7789 case GAUDI_EVENT_TPC6_BMON_SPMU: 7790 case GAUDI_EVENT_TPC7_BMON_SPMU: 7791 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7792 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7793 hl_fw_unmask_irq(hdev, event_type); 7794 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7795 break; 7796 7797 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7798 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7799 hl_fw_unmask_irq(hdev, event_type); 7800 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7801 break; 7802 7803 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7804 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7805 gaudi_print_sm_sei_info(hdev, event_type, 7806 &eq_entry->sm_sei_data); 7807 rc = hl_state_dump(hdev); 7808 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7809 if (rc) 7810 dev_err(hdev->dev, 7811 "Error during system state dump %d\n", rc); 7812 hl_fw_unmask_irq(hdev, event_type); 7813 break; 7814 7815 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7816 break; 7817 7818 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7819 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7820 hl_fw_unmask_irq(hdev, event_type); 7821 break; 7822 7823 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7824 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7825 dev_err(hdev->dev, 7826 "Received high temp H/W interrupt %d (cause %d)\n", 7827 event_type, cause); 7828 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7829 break; 7830 7831 case GAUDI_EVENT_DEV_RESET_REQ: 7832 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7833 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7834 goto reset_device; 7835 7836 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7837 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7838 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7839 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7840 goto reset_device; 7841 7842 case GAUDI_EVENT_FW_ALIVE_S: 7843 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7844 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7845 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7846 fw_err_info.event_id = event_type; 7847 fw_err_info.event_mask = &event_mask; 7848 hl_handle_fw_err(hdev, &fw_err_info); 7849 goto reset_device; 7850 7851 default: 7852 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7853 event_type); 7854 break; 7855 } 7856 7857 if (event_mask) 7858 hl_notifier_event_send_all(hdev, event_mask); 7859 7860 return; 7861 7862 reset_device: 7863 reset_required = true; 7864 7865 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7866 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7867 7868 /* notify on device unavailable while the reset triggered by fw */ 7869 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7870 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7871 } else if (hdev->hard_reset_on_fw_events) { 7872 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7873 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7874 } else { 7875 reset_required = false; 7876 } 7877 7878 if (reset_required) { 7879 /* escalate general hw errors to critical/fatal error */ 7880 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7881 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7882 7883 hl_device_cond_reset(hdev, flags, event_mask); 7884 } else { 7885 hl_fw_unmask_irq(hdev, event_type); 7886 /* Notification on occurred event needs to be sent although reset is not executed */ 7887 if (event_mask) 7888 hl_notifier_event_send_all(hdev, event_mask); 7889 } 7890 } 7891 7892 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7893 { 7894 struct gaudi_device *gaudi = hdev->asic_specific; 7895 7896 if (aggregate) { 7897 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7898 return gaudi->events_stat_aggregate; 7899 } 7900 7901 *size = (u32) sizeof(gaudi->events_stat); 7902 return gaudi->events_stat; 7903 } 7904 7905 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7906 { 7907 struct gaudi_device *gaudi = hdev->asic_specific; 7908 u32 status, timeout_usec; 7909 int rc; 7910 7911 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7912 hdev->reset_info.hard_reset_pending) 7913 return 0; 7914 7915 if (hdev->pldm) 7916 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7917 else 7918 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7919 7920 /* L0 & L1 invalidation */ 7921 WREG32(mmSTLB_INV_PS, 3); 7922 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7923 WREG32(mmSTLB_INV_PS, 2); 7924 7925 rc = hl_poll_timeout( 7926 hdev, 7927 mmSTLB_INV_PS, 7928 status, 7929 !status, 7930 1000, 7931 timeout_usec); 7932 7933 WREG32(mmSTLB_INV_SET, 0); 7934 7935 return rc; 7936 } 7937 7938 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 7939 bool is_hard, u32 flags, 7940 u32 asid, u64 va, u64 size) 7941 { 7942 /* Treat as invalidate all because there is no range invalidation 7943 * in Gaudi 7944 */ 7945 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 7946 } 7947 7948 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 7949 { 7950 u32 status, timeout_usec; 7951 int rc; 7952 7953 if (hdev->pldm) 7954 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7955 else 7956 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7957 7958 WREG32(MMU_ASID, asid); 7959 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 7960 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 7961 WREG32(MMU_BUSY, 0x80000000); 7962 7963 rc = hl_poll_timeout( 7964 hdev, 7965 MMU_BUSY, 7966 status, 7967 !(status & 0x80000000), 7968 1000, 7969 timeout_usec); 7970 7971 if (rc) { 7972 dev_err(hdev->dev, 7973 "Timeout during MMU hop0 config of asid %d\n", asid); 7974 return rc; 7975 } 7976 7977 return 0; 7978 } 7979 7980 static int gaudi_send_heartbeat(struct hl_device *hdev) 7981 { 7982 struct gaudi_device *gaudi = hdev->asic_specific; 7983 7984 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7985 return 0; 7986 7987 return hl_fw_send_heartbeat(hdev); 7988 } 7989 7990 static int gaudi_cpucp_info_get(struct hl_device *hdev) 7991 { 7992 struct gaudi_device *gaudi = hdev->asic_specific; 7993 struct asic_fixed_properties *prop = &hdev->asic_prop; 7994 int rc; 7995 7996 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7997 return 0; 7998 7999 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8000 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8001 mmCPU_BOOT_ERR1); 8002 if (rc) 8003 return rc; 8004 8005 if (!strlen(prop->cpucp_info.card_name)) 8006 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8007 CARD_NAME_MAX_LEN); 8008 8009 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8010 8011 set_default_power_values(hdev); 8012 8013 return 0; 8014 } 8015 8016 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8017 struct engines_data *e) 8018 { 8019 struct gaudi_device *gaudi = hdev->asic_specific; 8020 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8021 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8022 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8023 unsigned long *mask = (unsigned long *)mask_arr; 8024 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8025 bool is_idle = true, is_eng_idle, is_slave; 8026 u64 offset; 8027 int i, dma_id, port; 8028 8029 if (e) 8030 hl_engine_data_sprintf(e, 8031 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8032 "--- ------- ------------ ---------- -------------\n"); 8033 8034 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8035 dma_id = gaudi_dma_assignment[i]; 8036 offset = dma_id * DMA_QMAN_OFFSET; 8037 8038 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8039 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8040 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8041 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8042 IS_DMA_IDLE(dma_core_sts0); 8043 is_idle &= is_eng_idle; 8044 8045 if (mask && !is_eng_idle) 8046 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8047 if (e) 8048 hl_engine_data_sprintf(e, fmt, dma_id, 8049 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8050 qm_cgm_sts, dma_core_sts0); 8051 } 8052 8053 if (e) 8054 hl_engine_data_sprintf(e, 8055 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8056 "--- ------- ------------ ---------- ----------\n"); 8057 8058 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8059 offset = i * TPC_QMAN_OFFSET; 8060 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8061 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8062 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8063 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8064 IS_TPC_IDLE(tpc_cfg_sts); 8065 is_idle &= is_eng_idle; 8066 8067 if (mask && !is_eng_idle) 8068 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8069 if (e) 8070 hl_engine_data_sprintf(e, fmt, i, 8071 is_eng_idle ? "Y" : "N", 8072 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8073 } 8074 8075 if (e) 8076 hl_engine_data_sprintf(e, 8077 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8078 "--- ------- ------------ ---------- -----------\n"); 8079 8080 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8081 offset = i * MME_QMAN_OFFSET; 8082 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8083 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8084 8085 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8086 is_slave = i % 2; 8087 if (!is_slave) { 8088 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8089 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8090 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8091 } 8092 8093 is_idle &= is_eng_idle; 8094 8095 if (mask && !is_eng_idle) 8096 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8097 if (e) { 8098 if (!is_slave) 8099 hl_engine_data_sprintf(e, fmt, i, 8100 is_eng_idle ? "Y" : "N", 8101 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8102 else 8103 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8104 is_eng_idle ? "Y" : "N", "-", 8105 "-", mme_arch_sts); 8106 } 8107 } 8108 8109 if (e) 8110 hl_engine_data_sprintf(e, 8111 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8112 "--- ------- ------------ ----------\n"); 8113 8114 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8115 offset = i * NIC_MACRO_QMAN_OFFSET; 8116 port = 2 * i; 8117 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8118 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8119 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8120 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8121 is_idle &= is_eng_idle; 8122 8123 if (mask && !is_eng_idle) 8124 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8125 if (e) 8126 hl_engine_data_sprintf(e, nic_fmt, port, 8127 is_eng_idle ? "Y" : "N", 8128 qm_glbl_sts0, qm_cgm_sts); 8129 } 8130 8131 port = 2 * i + 1; 8132 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8133 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8134 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8135 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8136 is_idle &= is_eng_idle; 8137 8138 if (mask && !is_eng_idle) 8139 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8140 if (e) 8141 hl_engine_data_sprintf(e, nic_fmt, port, 8142 is_eng_idle ? "Y" : "N", 8143 qm_glbl_sts0, qm_cgm_sts); 8144 } 8145 } 8146 8147 if (e) 8148 hl_engine_data_sprintf(e, "\n"); 8149 8150 return is_idle; 8151 } 8152 8153 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8154 __acquires(&gaudi->hw_queues_lock) 8155 { 8156 struct gaudi_device *gaudi = hdev->asic_specific; 8157 8158 spin_lock(&gaudi->hw_queues_lock); 8159 } 8160 8161 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8162 __releases(&gaudi->hw_queues_lock) 8163 { 8164 struct gaudi_device *gaudi = hdev->asic_specific; 8165 8166 spin_unlock(&gaudi->hw_queues_lock); 8167 } 8168 8169 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8170 { 8171 return hdev->pdev->device; 8172 } 8173 8174 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8175 size_t max_size) 8176 { 8177 struct gaudi_device *gaudi = hdev->asic_specific; 8178 8179 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8180 return 0; 8181 8182 return hl_fw_get_eeprom_data(hdev, data, max_size); 8183 } 8184 8185 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8186 { 8187 struct gaudi_device *gaudi = hdev->asic_specific; 8188 8189 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8190 return 0; 8191 8192 return hl_fw_get_monitor_dump(hdev, data); 8193 } 8194 8195 /* 8196 * this function should be used only during initialization and/or after reset, 8197 * when there are no active users. 8198 */ 8199 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8200 { 8201 u64 kernel_timeout; 8202 u32 status, offset; 8203 int rc; 8204 8205 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8206 8207 if (hdev->pldm) 8208 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8209 else 8210 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8211 8212 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8213 lower_32_bits(tpc_kernel)); 8214 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8215 upper_32_bits(tpc_kernel)); 8216 8217 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8218 lower_32_bits(tpc_kernel)); 8219 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8220 upper_32_bits(tpc_kernel)); 8221 /* set a valid LUT pointer, content is of no significance */ 8222 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8223 lower_32_bits(tpc_kernel)); 8224 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8225 upper_32_bits(tpc_kernel)); 8226 8227 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8228 lower_32_bits(CFG_BASE + 8229 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8230 8231 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8232 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8233 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8234 /* wait a bit for the engine to start executing */ 8235 usleep_range(1000, 1500); 8236 8237 /* wait until engine has finished executing */ 8238 rc = hl_poll_timeout( 8239 hdev, 8240 mmTPC0_CFG_STATUS + offset, 8241 status, 8242 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8243 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8244 1000, 8245 kernel_timeout); 8246 8247 if (rc) { 8248 dev_err(hdev->dev, 8249 "Timeout while waiting for TPC%d icache prefetch\n", 8250 tpc_id); 8251 return -EIO; 8252 } 8253 8254 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8255 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8256 8257 /* wait a bit for the engine to start executing */ 8258 usleep_range(1000, 1500); 8259 8260 /* wait until engine has finished executing */ 8261 rc = hl_poll_timeout( 8262 hdev, 8263 mmTPC0_CFG_STATUS + offset, 8264 status, 8265 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8266 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8267 1000, 8268 kernel_timeout); 8269 8270 if (rc) { 8271 dev_err(hdev->dev, 8272 "Timeout while waiting for TPC%d vector pipe\n", 8273 tpc_id); 8274 return -EIO; 8275 } 8276 8277 rc = hl_poll_timeout( 8278 hdev, 8279 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8280 status, 8281 (status == 0), 8282 1000, 8283 kernel_timeout); 8284 8285 if (rc) { 8286 dev_err(hdev->dev, 8287 "Timeout while waiting for TPC%d kernel to execute\n", 8288 tpc_id); 8289 return -EIO; 8290 } 8291 8292 return 0; 8293 } 8294 8295 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8296 struct hl_ctx *ctx) 8297 { 8298 struct gaudi_device *gaudi = hdev->asic_specific; 8299 int min_alloc_order, rc, collective_cb_size; 8300 8301 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8302 return 0; 8303 8304 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8305 HOST_SPACE_INTERNAL_CB_SZ, 8306 &hdev->internal_cb_pool_dma_addr, 8307 GFP_KERNEL | __GFP_ZERO); 8308 8309 if (!hdev->internal_cb_pool_virt_addr) 8310 return -ENOMEM; 8311 8312 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8313 sizeof(struct packet_fence); 8314 min_alloc_order = ilog2(collective_cb_size); 8315 8316 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8317 if (!hdev->internal_cb_pool) { 8318 dev_err(hdev->dev, 8319 "Failed to create internal CB pool\n"); 8320 rc = -ENOMEM; 8321 goto free_internal_cb_pool; 8322 } 8323 8324 rc = gen_pool_add(hdev->internal_cb_pool, 8325 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8326 HOST_SPACE_INTERNAL_CB_SZ, -1); 8327 if (rc) { 8328 dev_err(hdev->dev, 8329 "Failed to add memory to internal CB pool\n"); 8330 rc = -EFAULT; 8331 goto destroy_internal_cb_pool; 8332 } 8333 8334 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8335 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8336 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8337 8338 if (!hdev->internal_cb_va_base) { 8339 rc = -ENOMEM; 8340 goto destroy_internal_cb_pool; 8341 } 8342 8343 mutex_lock(&hdev->mmu_lock); 8344 8345 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8346 hdev->internal_cb_pool_dma_addr, 8347 HOST_SPACE_INTERNAL_CB_SZ); 8348 if (rc) 8349 goto unreserve_internal_cb_pool; 8350 8351 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8352 if (rc) 8353 goto unmap_internal_cb_pool; 8354 8355 mutex_unlock(&hdev->mmu_lock); 8356 8357 return 0; 8358 8359 unmap_internal_cb_pool: 8360 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8361 HOST_SPACE_INTERNAL_CB_SZ); 8362 unreserve_internal_cb_pool: 8363 mutex_unlock(&hdev->mmu_lock); 8364 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8365 HOST_SPACE_INTERNAL_CB_SZ); 8366 destroy_internal_cb_pool: 8367 gen_pool_destroy(hdev->internal_cb_pool); 8368 free_internal_cb_pool: 8369 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8370 hdev->internal_cb_pool_dma_addr); 8371 8372 return rc; 8373 } 8374 8375 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8376 struct hl_ctx *ctx) 8377 { 8378 struct gaudi_device *gaudi = hdev->asic_specific; 8379 8380 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8381 return; 8382 8383 mutex_lock(&hdev->mmu_lock); 8384 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8385 HOST_SPACE_INTERNAL_CB_SZ); 8386 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8387 HOST_SPACE_INTERNAL_CB_SZ); 8388 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8389 mutex_unlock(&hdev->mmu_lock); 8390 8391 gen_pool_destroy(hdev->internal_cb_pool); 8392 8393 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8394 hdev->internal_cb_pool_dma_addr); 8395 } 8396 8397 static int gaudi_ctx_init(struct hl_ctx *ctx) 8398 { 8399 int rc; 8400 8401 if (ctx->asid == HL_KERNEL_ASID_ID) 8402 return 0; 8403 8404 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8405 if (rc) 8406 return rc; 8407 8408 rc = gaudi_restore_user_registers(ctx->hdev); 8409 if (rc) 8410 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8411 8412 return rc; 8413 } 8414 8415 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8416 { 8417 if (ctx->asid == HL_KERNEL_ASID_ID) 8418 return; 8419 8420 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8421 } 8422 8423 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8424 { 8425 return 0; 8426 } 8427 8428 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8429 { 8430 return gaudi_cq_assignment[cq_idx]; 8431 } 8432 8433 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8434 { 8435 return sizeof(struct packet_msg_short) + 8436 sizeof(struct packet_msg_prot) * 2; 8437 } 8438 8439 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8440 { 8441 return sizeof(struct packet_msg_short) * 4 + 8442 sizeof(struct packet_fence) + 8443 sizeof(struct packet_msg_prot) * 2; 8444 } 8445 8446 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8447 { 8448 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8449 } 8450 8451 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8452 u32 size, bool eb) 8453 { 8454 struct hl_cb *cb = (struct hl_cb *) data; 8455 struct packet_msg_short *pkt; 8456 u32 value, ctl, pkt_size = sizeof(*pkt); 8457 8458 pkt = cb->kernel_address + size; 8459 memset(pkt, 0, pkt_size); 8460 8461 /* Inc by 1, Mode ADD */ 8462 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8463 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8464 8465 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8466 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8467 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8468 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8469 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8470 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8471 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8472 8473 pkt->value = cpu_to_le32(value); 8474 pkt->ctl = cpu_to_le32(ctl); 8475 8476 return size + pkt_size; 8477 } 8478 8479 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8480 u16 addr) 8481 { 8482 u32 ctl, pkt_size = sizeof(*pkt); 8483 8484 memset(pkt, 0, pkt_size); 8485 8486 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8487 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8488 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8489 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8490 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8491 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8492 8493 pkt->value = cpu_to_le32(value); 8494 pkt->ctl = cpu_to_le32(ctl); 8495 8496 return pkt_size; 8497 } 8498 8499 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8500 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8501 u16 sob_val, u16 mon_id) 8502 { 8503 u64 monitor_base; 8504 u32 ctl, value, pkt_size = sizeof(*pkt); 8505 u16 msg_addr_offset; 8506 u8 mask; 8507 8508 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8509 dev_err(hdev->dev, 8510 "sob_base %u (mask %#x) is not valid\n", 8511 sob_base, sob_mask); 8512 return 0; 8513 } 8514 8515 /* 8516 * monitor_base should be the content of the base0 address registers, 8517 * so it will be added to the msg short offsets 8518 */ 8519 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8520 8521 msg_addr_offset = 8522 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8523 monitor_base; 8524 8525 memset(pkt, 0, pkt_size); 8526 8527 /* Monitor config packet: bind the monitor to a sync object */ 8528 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8529 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8530 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8531 0); /* GREATER OR EQUAL*/ 8532 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8533 8534 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8535 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8536 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8537 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8538 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8539 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8540 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8541 8542 pkt->value = cpu_to_le32(value); 8543 pkt->ctl = cpu_to_le32(ctl); 8544 8545 return pkt_size; 8546 } 8547 8548 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8549 { 8550 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8551 8552 memset(pkt, 0, pkt_size); 8553 8554 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8555 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8556 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8557 8558 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8559 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8560 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8561 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8562 8563 pkt->cfg = cpu_to_le32(cfg); 8564 pkt->ctl = cpu_to_le32(ctl); 8565 8566 return pkt_size; 8567 } 8568 8569 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8570 { 8571 u32 offset, nic_index; 8572 8573 switch (queue_id) { 8574 case GAUDI_QUEUE_ID_DMA_0_0: 8575 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8576 break; 8577 case GAUDI_QUEUE_ID_DMA_0_1: 8578 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8579 break; 8580 case GAUDI_QUEUE_ID_DMA_0_2: 8581 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8582 break; 8583 case GAUDI_QUEUE_ID_DMA_0_3: 8584 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8585 break; 8586 case GAUDI_QUEUE_ID_DMA_1_0: 8587 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8588 break; 8589 case GAUDI_QUEUE_ID_DMA_1_1: 8590 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8591 break; 8592 case GAUDI_QUEUE_ID_DMA_1_2: 8593 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8594 break; 8595 case GAUDI_QUEUE_ID_DMA_1_3: 8596 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8597 break; 8598 case GAUDI_QUEUE_ID_DMA_5_0: 8599 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8600 break; 8601 case GAUDI_QUEUE_ID_DMA_5_1: 8602 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8603 break; 8604 case GAUDI_QUEUE_ID_DMA_5_2: 8605 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8606 break; 8607 case GAUDI_QUEUE_ID_DMA_5_3: 8608 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8609 break; 8610 case GAUDI_QUEUE_ID_TPC_7_0: 8611 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8612 break; 8613 case GAUDI_QUEUE_ID_TPC_7_1: 8614 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8615 break; 8616 case GAUDI_QUEUE_ID_TPC_7_2: 8617 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8618 break; 8619 case GAUDI_QUEUE_ID_TPC_7_3: 8620 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8621 break; 8622 case GAUDI_QUEUE_ID_NIC_0_0: 8623 case GAUDI_QUEUE_ID_NIC_1_0: 8624 case GAUDI_QUEUE_ID_NIC_2_0: 8625 case GAUDI_QUEUE_ID_NIC_3_0: 8626 case GAUDI_QUEUE_ID_NIC_4_0: 8627 case GAUDI_QUEUE_ID_NIC_5_0: 8628 case GAUDI_QUEUE_ID_NIC_6_0: 8629 case GAUDI_QUEUE_ID_NIC_7_0: 8630 case GAUDI_QUEUE_ID_NIC_8_0: 8631 case GAUDI_QUEUE_ID_NIC_9_0: 8632 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8633 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8634 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8635 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8636 break; 8637 case GAUDI_QUEUE_ID_NIC_0_1: 8638 case GAUDI_QUEUE_ID_NIC_1_1: 8639 case GAUDI_QUEUE_ID_NIC_2_1: 8640 case GAUDI_QUEUE_ID_NIC_3_1: 8641 case GAUDI_QUEUE_ID_NIC_4_1: 8642 case GAUDI_QUEUE_ID_NIC_5_1: 8643 case GAUDI_QUEUE_ID_NIC_6_1: 8644 case GAUDI_QUEUE_ID_NIC_7_1: 8645 case GAUDI_QUEUE_ID_NIC_8_1: 8646 case GAUDI_QUEUE_ID_NIC_9_1: 8647 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8648 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8649 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8650 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8651 break; 8652 case GAUDI_QUEUE_ID_NIC_0_2: 8653 case GAUDI_QUEUE_ID_NIC_1_2: 8654 case GAUDI_QUEUE_ID_NIC_2_2: 8655 case GAUDI_QUEUE_ID_NIC_3_2: 8656 case GAUDI_QUEUE_ID_NIC_4_2: 8657 case GAUDI_QUEUE_ID_NIC_5_2: 8658 case GAUDI_QUEUE_ID_NIC_6_2: 8659 case GAUDI_QUEUE_ID_NIC_7_2: 8660 case GAUDI_QUEUE_ID_NIC_8_2: 8661 case GAUDI_QUEUE_ID_NIC_9_2: 8662 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8663 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8664 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8665 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8666 break; 8667 case GAUDI_QUEUE_ID_NIC_0_3: 8668 case GAUDI_QUEUE_ID_NIC_1_3: 8669 case GAUDI_QUEUE_ID_NIC_2_3: 8670 case GAUDI_QUEUE_ID_NIC_3_3: 8671 case GAUDI_QUEUE_ID_NIC_4_3: 8672 case GAUDI_QUEUE_ID_NIC_5_3: 8673 case GAUDI_QUEUE_ID_NIC_6_3: 8674 case GAUDI_QUEUE_ID_NIC_7_3: 8675 case GAUDI_QUEUE_ID_NIC_8_3: 8676 case GAUDI_QUEUE_ID_NIC_9_3: 8677 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8678 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8679 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8680 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8681 break; 8682 default: 8683 return -EINVAL; 8684 } 8685 8686 *addr = CFG_BASE + offset; 8687 8688 return 0; 8689 } 8690 8691 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8692 { 8693 u64 monitor_base; 8694 u32 size = 0; 8695 u16 msg_addr_offset; 8696 8697 /* 8698 * monitor_base should be the content of the base0 address registers, 8699 * so it will be added to the msg short offsets 8700 */ 8701 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8702 8703 /* First monitor config packet: low address of the sync */ 8704 msg_addr_offset = 8705 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8706 monitor_base; 8707 8708 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8709 msg_addr_offset); 8710 8711 /* Second monitor config packet: high address of the sync */ 8712 msg_addr_offset = 8713 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8714 monitor_base; 8715 8716 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8717 msg_addr_offset); 8718 8719 /* 8720 * Third monitor config packet: the payload, i.e. what to write when the 8721 * sync triggers 8722 */ 8723 msg_addr_offset = 8724 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8725 monitor_base; 8726 8727 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8728 8729 return size; 8730 } 8731 8732 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8733 struct hl_gen_wait_properties *prop) 8734 { 8735 struct hl_cb *cb = (struct hl_cb *) prop->data; 8736 void *buf = cb->kernel_address; 8737 u64 fence_addr = 0; 8738 u32 size = prop->size; 8739 8740 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8741 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8742 prop->q_idx); 8743 return 0; 8744 } 8745 8746 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8747 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8748 prop->sob_mask, prop->sob_val, prop->mon_id); 8749 size += gaudi_add_fence_pkt(buf + size); 8750 8751 return size; 8752 } 8753 8754 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8755 { 8756 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8757 8758 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8759 hw_sob->sob_id); 8760 8761 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8762 hw_sob->sob_id * 4, 0); 8763 8764 kref_init(&hw_sob->kref); 8765 } 8766 8767 static u64 gaudi_get_device_time(struct hl_device *hdev) 8768 { 8769 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8770 8771 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8772 } 8773 8774 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8775 u32 *block_size, u32 *block_id) 8776 { 8777 return -EPERM; 8778 } 8779 8780 static int gaudi_block_mmap(struct hl_device *hdev, 8781 struct vm_area_struct *vma, 8782 u32 block_id, u32 block_size) 8783 { 8784 return -EPERM; 8785 } 8786 8787 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8788 { 8789 struct cpu_dyn_regs *dyn_regs = 8790 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8791 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8792 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8793 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8794 8795 WREG32(irq_handler_offset, 8796 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8797 } 8798 8799 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8800 { 8801 return -EINVAL; 8802 } 8803 8804 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8805 { 8806 switch (pll_idx) { 8807 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8808 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8809 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8810 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8811 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8812 case HL_GAUDI_MME_PLL: return MME_PLL; 8813 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8814 case HL_GAUDI_IF_PLL: return IF_PLL; 8815 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8816 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8817 default: return -EINVAL; 8818 } 8819 } 8820 8821 static int gaudi_add_sync_to_engine_map_entry( 8822 struct hl_sync_to_engine_map *map, u32 reg_value, 8823 enum hl_sync_engine_type engine_type, u32 engine_id) 8824 { 8825 struct hl_sync_to_engine_map_entry *entry; 8826 8827 /* Reg value represents a partial address of sync object, 8828 * it is used as unique identifier. For this we need to 8829 * clear the cutoff cfg base bits from the value. 8830 */ 8831 if (reg_value == 0 || reg_value == 0xffffffff) 8832 return 0; 8833 reg_value -= lower_32_bits(CFG_BASE); 8834 8835 /* create a new hash entry */ 8836 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8837 if (!entry) 8838 return -ENOMEM; 8839 entry->engine_type = engine_type; 8840 entry->engine_id = engine_id; 8841 entry->sync_id = reg_value; 8842 hash_add(map->tb, &entry->node, reg_value); 8843 8844 return 0; 8845 } 8846 8847 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8848 struct hl_sync_to_engine_map *map) 8849 { 8850 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8851 int i, j, rc; 8852 u32 reg_value; 8853 8854 /* Iterate over TPC engines */ 8855 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8856 8857 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8858 sds->props[SP_NEXT_TPC] * i); 8859 8860 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8861 ENGINE_TPC, i); 8862 if (rc) 8863 goto free_sync_to_engine_map; 8864 } 8865 8866 /* Iterate over MME engines */ 8867 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8868 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8869 8870 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8871 sds->props[SP_NEXT_MME] * i + 8872 j * sizeof(u32)); 8873 8874 rc = gaudi_add_sync_to_engine_map_entry( 8875 map, reg_value, ENGINE_MME, 8876 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8877 if (rc) 8878 goto free_sync_to_engine_map; 8879 } 8880 } 8881 8882 /* Iterate over DMA engines */ 8883 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8884 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8885 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8886 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8887 ENGINE_DMA, i); 8888 if (rc) 8889 goto free_sync_to_engine_map; 8890 } 8891 8892 return 0; 8893 8894 free_sync_to_engine_map: 8895 hl_state_dump_free_sync_to_engine_map(map); 8896 8897 return rc; 8898 } 8899 8900 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8901 { 8902 return FIELD_GET( 8903 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8904 mon->status); 8905 } 8906 8907 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8908 { 8909 const size_t max_write = 10; 8910 u32 gid, mask, sob; 8911 int i, offset; 8912 8913 /* Sync object ID is calculated as follows: 8914 * (8 * group_id + cleared bits in mask) 8915 */ 8916 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8917 mon->arm_data); 8918 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8919 mon->arm_data); 8920 8921 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8922 max_write; mask >>= 1, i++) { 8923 if (!(mask & 1)) { 8924 sob = gid * MONITOR_MAX_SOBS + i; 8925 8926 if (offset > 0) 8927 offset += snprintf(sobs + offset, max_write, 8928 ", "); 8929 8930 offset += snprintf(sobs + offset, max_write, "%u", sob); 8931 } 8932 } 8933 } 8934 8935 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8936 struct hl_device *hdev, 8937 struct hl_mon_state_dump *mon) 8938 { 8939 const char *name; 8940 char scratch_buf1[BIN_REG_STRING_SIZE], 8941 scratch_buf2[BIN_REG_STRING_SIZE]; 8942 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8943 8944 name = hl_state_dump_get_monitor_name(hdev, mon); 8945 if (!name) 8946 name = ""; 8947 8948 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 8949 8950 return hl_snprintf_resize( 8951 buf, size, offset, 8952 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 8953 mon->id, name, 8954 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8955 mon->arm_data), 8956 hl_format_as_binary( 8957 scratch_buf1, sizeof(scratch_buf1), 8958 FIELD_GET( 8959 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8960 mon->arm_data)), 8961 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 8962 mon->arm_data), 8963 mon->wr_data, 8964 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 8965 hl_format_as_binary( 8966 scratch_buf2, sizeof(scratch_buf2), 8967 FIELD_GET( 8968 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 8969 mon->status)), 8970 monitored_sobs); 8971 } 8972 8973 8974 static int gaudi_print_fences_single_engine( 8975 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 8976 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 8977 size_t *size, size_t *offset) 8978 { 8979 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8980 int rc = -ENOMEM, i; 8981 u32 *statuses, *fences; 8982 8983 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 8984 sizeof(*statuses), GFP_KERNEL); 8985 if (!statuses) 8986 goto out; 8987 8988 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 8989 sds->props[SP_ENGINE_NUM_OF_QUEUES], 8990 sizeof(*fences), GFP_KERNEL); 8991 if (!fences) 8992 goto free_status; 8993 8994 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 8995 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 8996 8997 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 8998 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 8999 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9000 9001 /* The actual print */ 9002 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9003 u32 fence_id; 9004 u64 fence_cnt, fence_rdata; 9005 const char *engine_name; 9006 9007 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9008 statuses[i])) 9009 continue; 9010 9011 fence_id = 9012 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9013 fence_cnt = base_offset + CFG_BASE + 9014 sizeof(u32) * 9015 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9016 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9017 sds->props[SP_FENCE0_RDATA_OFFSET]; 9018 engine_name = hl_sync_engine_to_string(engine_type); 9019 9020 rc = hl_snprintf_resize( 9021 buf, size, offset, 9022 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9023 engine_name, engine_id, 9024 i, fence_id, 9025 fence_cnt, engine_name, engine_id, fence_id, i, 9026 fence_rdata, engine_name, engine_id, fence_id, i, 9027 fences[fence_id], 9028 statuses[i]); 9029 if (rc) 9030 goto free_fences; 9031 } 9032 9033 rc = 0; 9034 9035 free_fences: 9036 kfree(fences); 9037 free_status: 9038 kfree(statuses); 9039 out: 9040 return rc; 9041 } 9042 9043 9044 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9045 .monitor_valid = gaudi_monitor_valid, 9046 .print_single_monitor = gaudi_print_single_monitor, 9047 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9048 .print_fences_single_engine = gaudi_print_fences_single_engine, 9049 }; 9050 9051 static void gaudi_state_dump_init(struct hl_device *hdev) 9052 { 9053 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9054 int i; 9055 9056 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9057 hash_add(sds->so_id_to_str_tb, 9058 &gaudi_so_id_to_str[i].node, 9059 gaudi_so_id_to_str[i].id); 9060 9061 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9062 hash_add(sds->monitor_id_to_str_tb, 9063 &gaudi_monitor_id_to_str[i].node, 9064 gaudi_monitor_id_to_str[i].id); 9065 9066 sds->props = gaudi_state_dump_specs_props; 9067 9068 sds->sync_namager_names = gaudi_sync_manager_names; 9069 9070 sds->funcs = gaudi_state_dump_funcs; 9071 } 9072 9073 static u32 *gaudi_get_stream_master_qid_arr(void) 9074 { 9075 return gaudi_stream_master; 9076 } 9077 9078 static int gaudi_set_dram_properties(struct hl_device *hdev) 9079 { 9080 return 0; 9081 } 9082 9083 static int gaudi_set_binning_masks(struct hl_device *hdev) 9084 { 9085 return 0; 9086 } 9087 9088 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9089 { 9090 } 9091 9092 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9093 { 9094 struct hl_device *hdev = dev_get_drvdata(dev); 9095 struct cpucp_info *cpucp_info; 9096 9097 cpucp_info = &hdev->asic_prop.cpucp_info; 9098 9099 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9100 } 9101 9102 static DEVICE_ATTR_RO(infineon_ver); 9103 9104 static struct attribute *gaudi_vrm_dev_attrs[] = { 9105 &dev_attr_infineon_ver.attr, 9106 NULL, 9107 }; 9108 9109 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9110 struct attribute_group *dev_vrm_attr_grp) 9111 { 9112 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9113 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9114 } 9115 9116 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9117 { 9118 return 0; 9119 } 9120 9121 static const struct hl_asic_funcs gaudi_funcs = { 9122 .early_init = gaudi_early_init, 9123 .early_fini = gaudi_early_fini, 9124 .late_init = gaudi_late_init, 9125 .late_fini = gaudi_late_fini, 9126 .sw_init = gaudi_sw_init, 9127 .sw_fini = gaudi_sw_fini, 9128 .hw_init = gaudi_hw_init, 9129 .hw_fini = gaudi_hw_fini, 9130 .halt_engines = gaudi_halt_engines, 9131 .suspend = gaudi_suspend, 9132 .resume = gaudi_resume, 9133 .mmap = gaudi_mmap, 9134 .ring_doorbell = gaudi_ring_doorbell, 9135 .pqe_write = gaudi_pqe_write, 9136 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9137 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9138 .scrub_device_mem = gaudi_scrub_device_mem, 9139 .scrub_device_dram = gaudi_scrub_device_dram, 9140 .get_int_queue_base = gaudi_get_int_queue_base, 9141 .test_queues = gaudi_test_queues, 9142 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9143 .asic_dma_pool_free = gaudi_dma_pool_free, 9144 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9145 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9146 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, 9147 .cs_parser = gaudi_cs_parser, 9148 .dma_map_sgtable = hl_asic_dma_map_sgtable, 9149 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9150 .update_eq_ci = gaudi_update_eq_ci, 9151 .context_switch = gaudi_context_switch, 9152 .restore_phase_topology = gaudi_restore_phase_topology, 9153 .debugfs_read_dma = gaudi_debugfs_read_dma, 9154 .add_device_attr = gaudi_add_device_attr, 9155 .handle_eqe = gaudi_handle_eqe, 9156 .get_events_stat = gaudi_get_events_stat, 9157 .read_pte = gaudi_read_pte, 9158 .write_pte = gaudi_write_pte, 9159 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9160 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9161 .mmu_prefetch_cache_range = NULL, 9162 .send_heartbeat = gaudi_send_heartbeat, 9163 .debug_coresight = gaudi_debug_coresight, 9164 .is_device_idle = gaudi_is_device_idle, 9165 .compute_reset_late_init = gaudi_compute_reset_late_init, 9166 .hw_queues_lock = gaudi_hw_queues_lock, 9167 .hw_queues_unlock = gaudi_hw_queues_unlock, 9168 .get_pci_id = gaudi_get_pci_id, 9169 .get_eeprom_data = gaudi_get_eeprom_data, 9170 .get_monitor_dump = gaudi_get_monitor_dump, 9171 .send_cpu_message = gaudi_send_cpu_message, 9172 .pci_bars_map = gaudi_pci_bars_map, 9173 .init_iatu = gaudi_init_iatu, 9174 .rreg = hl_rreg, 9175 .wreg = hl_wreg, 9176 .halt_coresight = gaudi_halt_coresight, 9177 .ctx_init = gaudi_ctx_init, 9178 .ctx_fini = gaudi_ctx_fini, 9179 .pre_schedule_cs = gaudi_pre_schedule_cs, 9180 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9181 .load_firmware_to_device = gaudi_load_firmware_to_device, 9182 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9183 .get_signal_cb_size = gaudi_get_signal_cb_size, 9184 .get_wait_cb_size = gaudi_get_wait_cb_size, 9185 .gen_signal_cb = gaudi_gen_signal_cb, 9186 .gen_wait_cb = gaudi_gen_wait_cb, 9187 .reset_sob = gaudi_reset_sob, 9188 .reset_sob_group = gaudi_reset_sob_group, 9189 .get_device_time = gaudi_get_device_time, 9190 .pb_print_security_errors = NULL, 9191 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9192 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9193 .get_dec_base_addr = NULL, 9194 .scramble_addr = hl_mmu_scramble_addr, 9195 .descramble_addr = hl_mmu_descramble_addr, 9196 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9197 .get_hw_block_id = gaudi_get_hw_block_id, 9198 .hw_block_mmap = gaudi_block_mmap, 9199 .enable_events_from_fw = gaudi_enable_events_from_fw, 9200 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9201 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9202 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9203 .init_firmware_loader = gaudi_init_firmware_loader, 9204 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9205 .state_dump_init = gaudi_state_dump_init, 9206 .get_sob_addr = gaudi_get_sob_addr, 9207 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9208 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9209 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9210 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9211 .access_dev_mem = hl_access_dev_mem, 9212 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9213 .send_device_activity = gaudi_send_device_activity, 9214 .set_dram_properties = gaudi_set_dram_properties, 9215 .set_binning_masks = gaudi_set_binning_masks, 9216 }; 9217 9218 /** 9219 * gaudi_set_asic_funcs - set GAUDI function pointers 9220 * 9221 * @hdev: pointer to hl_device structure 9222 * 9223 */ 9224 void gaudi_set_asic_funcs(struct hl_device *hdev) 9225 { 9226 hdev->asic_funcs = &gaudi_funcs; 9227 } 9228