1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE); 67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE); 68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE); 69 70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 71 72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 76 77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 86 87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 88 89 #define GAUDI_MAX_STRING_LEN 20 90 91 #define GAUDI_CB_POOL_CB_CNT 512 92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 93 94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 95 96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 97 98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 99 100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 101 102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 103 104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 105 106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 107 108 #define MONITOR_SOB_STRING_SIZE 256 109 110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 111 GAUDI_QUEUE_ID_DMA_0_0, 112 GAUDI_QUEUE_ID_DMA_0_1, 113 GAUDI_QUEUE_ID_DMA_0_2, 114 GAUDI_QUEUE_ID_DMA_0_3, 115 GAUDI_QUEUE_ID_DMA_1_0, 116 GAUDI_QUEUE_ID_DMA_1_1, 117 GAUDI_QUEUE_ID_DMA_1_2, 118 GAUDI_QUEUE_ID_DMA_1_3 119 }; 120 121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 130 }; 131 132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 133 [0] = GAUDI_QUEUE_ID_DMA_0_0, 134 [1] = GAUDI_QUEUE_ID_DMA_0_1, 135 [2] = GAUDI_QUEUE_ID_DMA_0_2, 136 [3] = GAUDI_QUEUE_ID_DMA_0_3, 137 [4] = GAUDI_QUEUE_ID_DMA_1_0, 138 [5] = GAUDI_QUEUE_ID_DMA_1_1, 139 [6] = GAUDI_QUEUE_ID_DMA_1_2, 140 [7] = GAUDI_QUEUE_ID_DMA_1_3, 141 }; 142 143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 144 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 149 [PACKET_REPEAT] = sizeof(struct packet_repeat), 150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 151 [PACKET_FENCE] = sizeof(struct packet_fence), 152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 153 [PACKET_NOP] = sizeof(struct packet_nop), 154 [PACKET_STOP] = sizeof(struct packet_stop), 155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 156 [PACKET_WAIT] = sizeof(struct packet_wait), 157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 158 }; 159 160 static inline bool validate_packet_id(enum packet_id id) 161 { 162 switch (id) { 163 case PACKET_WREG_32: 164 case PACKET_WREG_BULK: 165 case PACKET_MSG_LONG: 166 case PACKET_MSG_SHORT: 167 case PACKET_CP_DMA: 168 case PACKET_REPEAT: 169 case PACKET_MSG_PROT: 170 case PACKET_FENCE: 171 case PACKET_LIN_DMA: 172 case PACKET_NOP: 173 case PACKET_STOP: 174 case PACKET_ARB_POINT: 175 case PACKET_WAIT: 176 case PACKET_LOAD_AND_EXE: 177 return true; 178 default: 179 return false; 180 } 181 } 182 183 static const char * const 184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 185 "tpc_address_exceed_slm", 186 "tpc_div_by_0", 187 "tpc_spu_mac_overflow", 188 "tpc_spu_addsub_overflow", 189 "tpc_spu_abs_overflow", 190 "tpc_spu_fp_dst_nan_inf", 191 "tpc_spu_fp_dst_denorm", 192 "tpc_vpu_mac_overflow", 193 "tpc_vpu_addsub_overflow", 194 "tpc_vpu_abs_overflow", 195 "tpc_vpu_fp_dst_nan_inf", 196 "tpc_vpu_fp_dst_denorm", 197 "tpc_assertions", 198 "tpc_illegal_instruction", 199 "tpc_pc_wrap_around", 200 "tpc_qm_sw_err", 201 "tpc_hbw_rresp_err", 202 "tpc_hbw_bresp_err", 203 "tpc_lbw_rresp_err", 204 "tpc_lbw_bresp_err" 205 }; 206 207 static const char * const 208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 209 "PQ AXI HBW error", 210 "CQ AXI HBW error", 211 "CP AXI HBW error", 212 "CP error due to undefined OPCODE", 213 "CP encountered STOP OPCODE", 214 "CP AXI LBW error", 215 "CP WRREG32 or WRBULK returned error", 216 "N/A", 217 "FENCE 0 inc over max value and clipped", 218 "FENCE 1 inc over max value and clipped", 219 "FENCE 2 inc over max value and clipped", 220 "FENCE 3 inc over max value and clipped", 221 "FENCE 0 dec under min value and clipped", 222 "FENCE 1 dec under min value and clipped", 223 "FENCE 2 dec under min value and clipped", 224 "FENCE 3 dec under min value and clipped" 225 }; 226 227 static const char * const 228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 229 "Choice push while full error", 230 "Choice Q watchdog error", 231 "MSG AXI LBW returned with error" 232 }; 233 234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 348 }; 349 350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 378 }; 379 380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 392 }; 393 394 static s64 gaudi_state_dump_specs_props[] = { 395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 398 [SP_MON_OBJ_WR_ADDR_LOW] = 399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 400 [SP_MON_OBJ_WR_ADDR_HIGH] = 401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 422 [SP_FENCE0_CNT_OFFSET] = 423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 424 [SP_FENCE0_RDATA_OFFSET] = 425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_NUM_CORES] = 1, 428 }; 429 430 static const int gaudi_queue_id_to_engine_id[] = { 431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 460 }; 461 462 /* The order here is opposite to the order of the indexing in the h/w. 463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 464 */ 465 static const char * const gaudi_sync_manager_names[] = { 466 "SYNC_MGR_E_N", 467 "SYNC_MGR_W_N", 468 "SYNC_MGR_E_S", 469 "SYNC_MGR_W_S", 470 NULL 471 }; 472 473 struct ecc_info_extract_params { 474 u64 block_address; 475 u32 num_memories; 476 bool derr; 477 }; 478 479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 480 u64 phys_addr); 481 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 482 struct hl_cs_job *job); 483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 484 u32 size, u64 val); 485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 486 u32 num_regs, u32 val); 487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 488 u32 tpc_id); 489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 490 static int gaudi_cpucp_info_get(struct hl_device *hdev); 491 static void gaudi_disable_clock_gating(struct hl_device *hdev); 492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 494 u32 size, bool eb); 495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 496 struct hl_gen_wait_properties *prop); 497 static inline enum hl_collective_mode 498 get_collective_mode(struct hl_device *hdev, u32 queue_id) 499 { 500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 501 return HL_COLLECTIVE_MASTER; 502 503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 505 return HL_COLLECTIVE_SLAVE; 506 507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 509 return HL_COLLECTIVE_SLAVE; 510 511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 513 return HL_COLLECTIVE_SLAVE; 514 515 return HL_COLLECTIVE_NOT_SUPPORTED; 516 } 517 518 static inline void set_default_power_values(struct hl_device *hdev) 519 { 520 struct asic_fixed_properties *prop = &hdev->asic_prop; 521 522 if (hdev->card_type == cpucp_card_type_pmc) { 523 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 524 525 if (prop->fw_security_enabled) 526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 527 else 528 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 529 } else { 530 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 531 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 532 } 533 } 534 535 static int gaudi_set_fixed_properties(struct hl_device *hdev) 536 { 537 struct asic_fixed_properties *prop = &hdev->asic_prop; 538 u32 num_sync_stream_queues = 0; 539 int i; 540 541 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 542 prop->hw_queues_props = kcalloc(prop->max_queues, 543 sizeof(struct hw_queue_properties), 544 GFP_KERNEL); 545 546 if (!prop->hw_queues_props) 547 return -ENOMEM; 548 549 for (i = 0 ; i < prop->max_queues ; i++) { 550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 552 prop->hw_queues_props[i].driver_only = 0; 553 prop->hw_queues_props[i].supports_sync_stream = 1; 554 prop->hw_queues_props[i].cb_alloc_flags = 555 CB_ALLOC_KERNEL; 556 num_sync_stream_queues++; 557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 559 prop->hw_queues_props[i].driver_only = 1; 560 prop->hw_queues_props[i].supports_sync_stream = 0; 561 prop->hw_queues_props[i].cb_alloc_flags = 562 CB_ALLOC_KERNEL; 563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 565 prop->hw_queues_props[i].driver_only = 0; 566 prop->hw_queues_props[i].supports_sync_stream = 0; 567 prop->hw_queues_props[i].cb_alloc_flags = 568 CB_ALLOC_USER; 569 570 } 571 prop->hw_queues_props[i].collective_mode = 572 get_collective_mode(hdev, i); 573 } 574 575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 576 prop->cfg_base_address = CFG_BASE; 577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 578 prop->host_base_address = HOST_PHYS_BASE; 579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 581 prop->completion_mode = HL_COMPLETION_MODE_JOB; 582 prop->collective_first_sob = 0; 583 prop->collective_first_mon = 0; 584 585 /* 2 SOBs per internal queue stream are reserved for collective */ 586 prop->sync_stream_first_sob = 587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 588 * QMAN_STREAMS * HL_RSVD_SOBS; 589 590 /* 1 monitor per internal queue stream are reserved for collective 591 * 2 monitors per external queue stream are reserved for collective 592 */ 593 prop->sync_stream_first_mon = 594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 595 (NUMBER_OF_EXT_HW_QUEUES * 2); 596 597 prop->dram_base_address = DRAM_PHYS_BASE; 598 prop->dram_size = GAUDI_HBM_SIZE_32GB; 599 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 601 602 prop->sram_base_address = SRAM_BASE_ADDR; 603 prop->sram_size = SRAM_SIZE; 604 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 605 prop->sram_user_base_address = 606 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 607 608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 610 611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 612 if (hdev->pldm) 613 prop->mmu_pgt_size = 0x800000; /* 8MB */ 614 else 615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 616 prop->mmu_pte_size = HL_PTE_SIZE; 617 prop->dram_page_size = PAGE_SIZE_2MB; 618 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 619 prop->dram_supports_virtual_memory = false; 620 621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 631 prop->pmmu.start_addr = VA_HOST_SPACE_START; 632 prop->pmmu.end_addr = 633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 634 prop->pmmu.page_size = PAGE_SIZE_4KB; 635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 636 prop->pmmu.last_mask = LAST_MASK; 637 /* TODO: will be duplicated until implementing per-MMU props */ 638 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; 639 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 640 641 /* PMMU and HPMMU are the same except of page size */ 642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 644 645 /* shifts and masks are the same in PMMU and DMMU */ 646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 648 prop->dmmu.end_addr = VA_HOST_SPACE_END; 649 prop->dmmu.page_size = PAGE_SIZE_2MB; 650 prop->dmmu.pgt_size = prop->mmu_pgt_size; 651 652 prop->cfg_size = CFG_SIZE; 653 prop->max_asid = MAX_ASID; 654 prop->num_of_events = GAUDI_EVENT_SIZE; 655 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 656 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 657 658 set_default_power_values(hdev); 659 660 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 661 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 662 663 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 664 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 665 666 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 667 CARD_NAME_MAX_LEN); 668 669 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 670 671 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 672 prop->sync_stream_first_sob + 673 (num_sync_stream_queues * HL_RSVD_SOBS); 674 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 675 prop->sync_stream_first_mon + 676 (num_sync_stream_queues * HL_RSVD_MONS); 677 678 prop->first_available_user_interrupt = USHRT_MAX; 679 prop->tpc_interrupt_id = USHRT_MAX; 680 681 /* single msi */ 682 prop->eq_interrupt_id = 0; 683 684 for (i = 0 ; i < HL_MAX_DCORES ; i++) 685 prop->first_available_cq[i] = USHRT_MAX; 686 687 prop->fw_cpu_boot_dev_sts0_valid = false; 688 prop->fw_cpu_boot_dev_sts1_valid = false; 689 prop->hard_reset_done_by_fw = false; 690 prop->gic_interrupts_enable = true; 691 692 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 693 694 prop->clk_pll_index = HL_GAUDI_MME_PLL; 695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 696 697 prop->use_get_power_for_reset_history = true; 698 699 prop->configurable_stop_on_err = true; 700 701 prop->set_max_power_on_device_init = true; 702 703 prop->dma_mask = 48; 704 705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 706 707 return 0; 708 } 709 710 static int gaudi_pci_bars_map(struct hl_device *hdev) 711 { 712 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 713 bool is_wc[3] = {false, false, true}; 714 int rc; 715 716 rc = hl_pci_bars_map(hdev, name, is_wc); 717 if (rc) 718 return rc; 719 720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 721 (CFG_BASE - SPI_FLASH_BASE_ADDR); 722 723 return 0; 724 } 725 726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 727 { 728 struct gaudi_device *gaudi = hdev->asic_specific; 729 struct hl_inbound_pci_region pci_region; 730 u64 old_addr = addr; 731 int rc; 732 733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 734 return old_addr; 735 736 if (hdev->asic_prop.iatu_done_by_fw) 737 return U64_MAX; 738 739 /* Inbound Region 2 - Bar 4 - Point to HBM */ 740 pci_region.mode = PCI_BAR_MATCH_MODE; 741 pci_region.bar = HBM_BAR_ID; 742 pci_region.addr = addr; 743 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 744 if (rc) 745 return U64_MAX; 746 747 if (gaudi) { 748 old_addr = gaudi->hbm_bar_cur_addr; 749 gaudi->hbm_bar_cur_addr = addr; 750 } 751 752 return old_addr; 753 } 754 755 static int gaudi_init_iatu(struct hl_device *hdev) 756 { 757 struct hl_inbound_pci_region inbound_region; 758 struct hl_outbound_pci_region outbound_region; 759 int rc; 760 761 if (hdev->asic_prop.iatu_done_by_fw) 762 return 0; 763 764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 765 inbound_region.mode = PCI_BAR_MATCH_MODE; 766 inbound_region.bar = SRAM_BAR_ID; 767 inbound_region.addr = SRAM_BASE_ADDR; 768 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 769 if (rc) 770 goto done; 771 772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 773 inbound_region.mode = PCI_BAR_MATCH_MODE; 774 inbound_region.bar = CFG_BAR_ID; 775 inbound_region.addr = SPI_FLASH_BASE_ADDR; 776 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 777 if (rc) 778 goto done; 779 780 /* Inbound Region 2 - Bar 4 - Point to HBM */ 781 inbound_region.mode = PCI_BAR_MATCH_MODE; 782 inbound_region.bar = HBM_BAR_ID; 783 inbound_region.addr = DRAM_PHYS_BASE; 784 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 785 if (rc) 786 goto done; 787 788 /* Outbound Region 0 - Point to Host */ 789 outbound_region.addr = HOST_PHYS_BASE; 790 outbound_region.size = HOST_PHYS_SIZE; 791 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 792 793 done: 794 return rc; 795 } 796 797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 798 { 799 return RREG32(mmHW_STATE); 800 } 801 802 static int gaudi_early_init(struct hl_device *hdev) 803 { 804 struct asic_fixed_properties *prop = &hdev->asic_prop; 805 struct pci_dev *pdev = hdev->pdev; 806 resource_size_t pci_bar_size; 807 u32 fw_boot_status; 808 int rc; 809 810 rc = gaudi_set_fixed_properties(hdev); 811 if (rc) { 812 dev_err(hdev->dev, "Failed setting fixed properties\n"); 813 return rc; 814 } 815 816 /* Check BAR sizes */ 817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 818 819 if (pci_bar_size != SRAM_BAR_SIZE) { 820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 822 rc = -ENODEV; 823 goto free_queue_props; 824 } 825 826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 827 828 if (pci_bar_size != CFG_BAR_SIZE) { 829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 831 rc = -ENODEV; 832 goto free_queue_props; 833 } 834 835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 837 838 /* If FW security is enabled at this point it means no access to ELBI */ 839 if (hdev->asic_prop.fw_security_enabled) { 840 hdev->asic_prop.iatu_done_by_fw = true; 841 842 /* 843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 844 * decision can only be taken based on PCI ID security. 845 */ 846 hdev->asic_prop.gic_interrupts_enable = false; 847 goto pci_init; 848 } 849 850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 851 &fw_boot_status); 852 if (rc) 853 goto free_queue_props; 854 855 /* Check whether FW is configuring iATU */ 856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 858 hdev->asic_prop.iatu_done_by_fw = true; 859 860 pci_init: 861 rc = hl_pci_init(hdev); 862 if (rc) 863 goto free_queue_props; 864 865 /* Before continuing in the initialization, we need to read the preboot 866 * version to determine whether we run with a security-enabled firmware 867 */ 868 rc = hl_fw_read_preboot_status(hdev); 869 if (rc) { 870 if (hdev->reset_on_preboot_fail) 871 /* we are already on failure flow, so don't check if hw_fini fails. */ 872 hdev->asic_funcs->hw_fini(hdev, true, false); 873 goto pci_fini; 874 } 875 876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 878 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 879 if (rc) { 880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 881 goto pci_fini; 882 } 883 } 884 885 return 0; 886 887 pci_fini: 888 hl_pci_fini(hdev); 889 free_queue_props: 890 kfree(hdev->asic_prop.hw_queues_props); 891 return rc; 892 } 893 894 static int gaudi_early_fini(struct hl_device *hdev) 895 { 896 kfree(hdev->asic_prop.hw_queues_props); 897 hl_pci_fini(hdev); 898 899 return 0; 900 } 901 902 /** 903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 904 * 905 * @hdev: pointer to hl_device structure 906 * 907 */ 908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 909 { 910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 911 struct asic_fixed_properties *prop = &hdev->asic_prop; 912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 913 int rc; 914 915 if ((hdev->fw_components & FW_TYPE_LINUX) && 916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 917 struct gaudi_device *gaudi = hdev->asic_specific; 918 919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 920 return 0; 921 922 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 923 924 if (rc) 925 return rc; 926 927 freq = pll_freq_arr[2]; 928 } else { 929 /* Backward compatibility */ 930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 932 nr = RREG32(mmPSOC_CPU_PLL_NR); 933 nf = RREG32(mmPSOC_CPU_PLL_NF); 934 od = RREG32(mmPSOC_CPU_PLL_OD); 935 936 if (div_sel == DIV_SEL_REF_CLK || 937 div_sel == DIV_SEL_DIVIDED_REF) { 938 if (div_sel == DIV_SEL_REF_CLK) 939 freq = PLL_REF_CLK; 940 else 941 freq = PLL_REF_CLK / (div_fctr + 1); 942 } else if (div_sel == DIV_SEL_PLL_CLK || 943 div_sel == DIV_SEL_DIVIDED_PLL) { 944 pll_clk = PLL_REF_CLK * (nf + 1) / 945 ((nr + 1) * (od + 1)); 946 if (div_sel == DIV_SEL_PLL_CLK) 947 freq = pll_clk; 948 else 949 freq = pll_clk / (div_fctr + 1); 950 } else { 951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 952 freq = 0; 953 } 954 } 955 956 prop->psoc_timestamp_frequency = freq; 957 prop->psoc_pci_pll_nr = nr; 958 prop->psoc_pci_pll_nf = nf; 959 prop->psoc_pci_pll_od = od; 960 prop->psoc_pci_pll_div_factor = div_fctr; 961 962 return 0; 963 } 964 965 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 967 { 968 struct asic_fixed_properties *prop = &hdev->asic_prop; 969 struct packet_lin_dma *init_tpc_mem_pkt; 970 struct hl_cs_job *job; 971 struct hl_cb *cb; 972 u64 dst_addr; 973 u32 cb_size, ctl; 974 u8 tpc_id; 975 int rc; 976 977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 978 if (!cb) 979 return -EFAULT; 980 981 init_tpc_mem_pkt = cb->kernel_address; 982 cb_size = sizeof(*init_tpc_mem_pkt); 983 memset(init_tpc_mem_pkt, 0, cb_size); 984 985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 986 987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 991 992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 993 994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 995 996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 998 round_up(prop->sram_user_base_address, SZ_8K)); 999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1000 1001 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1002 if (!job) { 1003 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1004 rc = -ENOMEM; 1005 goto release_cb; 1006 } 1007 1008 job->id = 0; 1009 job->user_cb = cb; 1010 atomic_inc(&job->user_cb->cs_cnt); 1011 job->user_cb_size = cb_size; 1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1013 job->patched_cb = job->user_cb; 1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1015 1016 hl_debugfs_add_job(hdev, job); 1017 1018 rc = gaudi_send_job_on_qman0(hdev, job); 1019 1020 if (rc) 1021 goto free_job; 1022 1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1024 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1025 if (rc) 1026 break; 1027 } 1028 1029 free_job: 1030 hl_userptr_delete_list(hdev, &job->userptr_list); 1031 hl_debugfs_remove_job(hdev, job); 1032 kfree(job); 1033 atomic_dec(&cb->cs_cnt); 1034 1035 release_cb: 1036 hl_cb_put(cb); 1037 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1038 1039 return rc; 1040 } 1041 1042 /* 1043 * gaudi_init_tpc_mem() - Initialize TPC memories. 1044 * @hdev: Pointer to hl_device structure. 1045 * 1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1047 * 1048 * Return: 0 for success, negative value for error. 1049 */ 1050 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1051 { 1052 const struct firmware *fw; 1053 size_t fw_size; 1054 void *cpu_addr; 1055 dma_addr_t dma_handle; 1056 int rc, count = 5; 1057 1058 again: 1059 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1060 if (rc == -EINTR && count-- > 0) { 1061 msleep(50); 1062 goto again; 1063 } 1064 1065 if (rc) { 1066 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1067 GAUDI_TPC_FW_FILE); 1068 goto out; 1069 } 1070 1071 fw_size = fw->size; 1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1073 if (!cpu_addr) { 1074 dev_err(hdev->dev, 1075 "Failed to allocate %zu of dma memory for TPC kernel\n", 1076 fw_size); 1077 rc = -ENOMEM; 1078 goto out; 1079 } 1080 1081 memcpy(cpu_addr, fw->data, fw_size); 1082 1083 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1084 1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1086 1087 out: 1088 release_firmware(fw); 1089 return rc; 1090 } 1091 1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1093 { 1094 struct gaudi_device *gaudi = hdev->asic_specific; 1095 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1096 struct hl_hw_queue *q; 1097 u32 i, sob_id, sob_group_id, queue_id; 1098 1099 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1100 sob_group_id = 1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1103 1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1106 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1107 q->sync_stream_prop.collective_sob_id = sob_id + i; 1108 } 1109 1110 /* Both DMA5 and TPC7 use the same resources since only a single 1111 * engine need to participate in the reduction process 1112 */ 1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1114 q = &hdev->kernel_queues[queue_id]; 1115 q->sync_stream_prop.collective_sob_id = 1116 sob_id + NIC_NUMBER_OF_ENGINES; 1117 1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1119 q = &hdev->kernel_queues[queue_id]; 1120 q->sync_stream_prop.collective_sob_id = 1121 sob_id + NIC_NUMBER_OF_ENGINES; 1122 } 1123 1124 static void gaudi_sob_group_hw_reset(struct kref *ref) 1125 { 1126 struct gaudi_hw_sob_group *hw_sob_group = 1127 container_of(ref, struct gaudi_hw_sob_group, kref); 1128 struct hl_device *hdev = hw_sob_group->hdev; 1129 int i; 1130 1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1134 1135 kref_init(&hw_sob_group->kref); 1136 } 1137 1138 static void gaudi_sob_group_reset_error(struct kref *ref) 1139 { 1140 struct gaudi_hw_sob_group *hw_sob_group = 1141 container_of(ref, struct gaudi_hw_sob_group, kref); 1142 struct hl_device *hdev = hw_sob_group->hdev; 1143 1144 dev_crit(hdev->dev, 1145 "SOB release shouldn't be called here, base_sob_id: %d\n", 1146 hw_sob_group->base_sob_id); 1147 } 1148 1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1150 { 1151 struct gaudi_collective_properties *prop; 1152 int i; 1153 1154 prop = &gaudi->collective_props; 1155 1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1157 1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1161 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1162 /* Set collective engine bit */ 1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1164 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1165 } 1166 1167 static int gaudi_collective_init(struct hl_device *hdev) 1168 { 1169 u32 i, sob_id, reserved_sobs_per_group; 1170 struct gaudi_collective_properties *prop; 1171 struct gaudi_device *gaudi; 1172 1173 gaudi = hdev->asic_specific; 1174 prop = &gaudi->collective_props; 1175 sob_id = hdev->asic_prop.collective_first_sob; 1176 1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1178 reserved_sobs_per_group = 1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1180 1181 /* Init SOB groups */ 1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1183 prop->hw_sob_group[i].hdev = hdev; 1184 prop->hw_sob_group[i].base_sob_id = sob_id; 1185 sob_id += reserved_sobs_per_group; 1186 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1187 } 1188 1189 for (i = 0 ; i < QMAN_STREAMS; i++) { 1190 prop->next_sob_group_val[i] = 1; 1191 prop->curr_sob_group_idx[i] = 0; 1192 gaudi_collective_map_sobs(hdev, i); 1193 } 1194 1195 gaudi_collective_mstr_sob_mask_set(gaudi); 1196 1197 return 0; 1198 } 1199 1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1201 { 1202 struct gaudi_device *gaudi = hdev->asic_specific; 1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1204 1205 kref_put(&cprop->hw_sob_group[sob_group].kref, 1206 gaudi_sob_group_hw_reset); 1207 } 1208 1209 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1211 { 1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1213 struct gaudi_collective_properties *cprop; 1214 struct hl_gen_wait_properties wait_prop; 1215 struct hl_sync_stream_properties *prop; 1216 struct gaudi_device *gaudi; 1217 1218 gaudi = hdev->asic_specific; 1219 cprop = &gaudi->collective_props; 1220 queue_id = job->hw_queue_id; 1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1222 1223 master_sob_base = 1224 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1225 master_monitor = prop->collective_mstr_mon_id[0]; 1226 1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1228 1229 dev_dbg(hdev->dev, 1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1231 master_sob_base, cprop->mstr_sob_mask[0], 1232 cprop->next_sob_group_val[stream], 1233 master_monitor, queue_id); 1234 1235 wait_prop.data = (void *) job->patched_cb; 1236 wait_prop.sob_base = master_sob_base; 1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1238 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1239 wait_prop.mon_id = master_monitor; 1240 wait_prop.q_idx = queue_id; 1241 wait_prop.size = cb_size; 1242 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1243 1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1245 master_monitor = prop->collective_mstr_mon_id[1]; 1246 1247 dev_dbg(hdev->dev, 1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1249 master_sob_base, cprop->mstr_sob_mask[1], 1250 cprop->next_sob_group_val[stream], 1251 master_monitor, queue_id); 1252 1253 wait_prop.sob_base = master_sob_base; 1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1255 wait_prop.mon_id = master_monitor; 1256 wait_prop.size = cb_size; 1257 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1258 } 1259 1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1262 { 1263 struct hl_gen_wait_properties wait_prop; 1264 struct hl_sync_stream_properties *prop; 1265 u32 queue_id, cb_size = 0; 1266 1267 queue_id = job->hw_queue_id; 1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1269 1270 if (job->cs->encaps_signals) { 1271 /* use the encaps signal handle store earlier in the flow 1272 * and set the SOB information from the encaps 1273 * signals handle 1274 */ 1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1276 cs_cmpl); 1277 1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1279 job->cs->sequence, 1280 cs_cmpl->hw_sob->sob_id, 1281 cs_cmpl->sob_val); 1282 } 1283 1284 /* Add to wait CBs using slave monitor */ 1285 wait_prop.data = (void *) job->user_cb; 1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1287 wait_prop.sob_mask = 0x1; 1288 wait_prop.sob_val = cs_cmpl->sob_val; 1289 wait_prop.mon_id = prop->collective_slave_mon_id; 1290 wait_prop.q_idx = queue_id; 1291 wait_prop.size = cb_size; 1292 1293 dev_dbg(hdev->dev, 1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1296 prop->collective_slave_mon_id, queue_id); 1297 1298 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1299 1300 dev_dbg(hdev->dev, 1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1302 prop->collective_sob_id, queue_id); 1303 1304 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1305 prop->collective_sob_id, cb_size, false); 1306 } 1307 1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1309 { 1310 struct hl_cs_compl *signal_cs_cmpl = 1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1312 struct hl_cs_compl *cs_cmpl = 1313 container_of(cs->fence, struct hl_cs_compl, base_fence); 1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1315 struct gaudi_collective_properties *cprop; 1316 u32 stream, queue_id, sob_group_offset; 1317 struct gaudi_device *gaudi; 1318 struct hl_device *hdev; 1319 struct hl_cs_job *job; 1320 struct hl_ctx *ctx; 1321 1322 ctx = cs->ctx; 1323 hdev = ctx->hdev; 1324 gaudi = hdev->asic_specific; 1325 cprop = &gaudi->collective_props; 1326 1327 if (cs->encaps_signals) { 1328 cs_cmpl->hw_sob = handle->hw_sob; 1329 /* at this checkpoint we only need the hw_sob pointer 1330 * for the completion check before start going over the jobs 1331 * of the master/slaves, the sob_value will be taken later on 1332 * in gaudi_collective_slave_init_job depends on each 1333 * job wait offset value. 1334 */ 1335 cs_cmpl->sob_val = 0; 1336 } else { 1337 /* copy the SOB id and value of the signal CS */ 1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1340 } 1341 1342 /* check again if the signal cs already completed. 1343 * if yes then don't send any wait cs since the hw_sob 1344 * could be in reset already. if signal is not completed 1345 * then get refcount to hw_sob to prevent resetting the sob 1346 * while wait cs is not submitted. 1347 * note that this check is protected by two locks, 1348 * hw queue lock and completion object lock, 1349 * and the same completion object lock also protects 1350 * the hw_sob reset handler function. 1351 * The hw_queue lock prevent out of sync of hw_sob 1352 * refcount value, changed by signal/wait flows. 1353 */ 1354 spin_lock(&signal_cs_cmpl->lock); 1355 1356 if (completion_done(&cs->signal_fence->completion)) { 1357 spin_unlock(&signal_cs_cmpl->lock); 1358 return -EINVAL; 1359 } 1360 /* Increment kref since all slave queues are now waiting on it */ 1361 kref_get(&cs_cmpl->hw_sob->kref); 1362 1363 spin_unlock(&signal_cs_cmpl->lock); 1364 1365 /* Calculate the stream from collective master queue (1st job) */ 1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1367 stream = job->hw_queue_id % 4; 1368 sob_group_offset = 1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1370 1371 list_for_each_entry(job, &cs->job_list, cs_node) { 1372 queue_id = job->hw_queue_id; 1373 1374 if (hdev->kernel_queues[queue_id].collective_mode == 1375 HL_COLLECTIVE_MASTER) 1376 gaudi_collective_master_init_job(hdev, job, stream, 1377 sob_group_offset); 1378 else 1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1380 } 1381 1382 cs_cmpl->sob_group = sob_group_offset; 1383 1384 /* Handle sob group kref and wraparound */ 1385 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1386 cprop->next_sob_group_val[stream]++; 1387 1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1389 /* 1390 * Decrement as we reached the max value. 1391 * The release function won't be called here as we've 1392 * just incremented the refcount. 1393 */ 1394 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1395 gaudi_sob_group_reset_error); 1396 cprop->next_sob_group_val[stream] = 1; 1397 /* only two SOBs are currently in use */ 1398 cprop->curr_sob_group_idx[stream] = 1399 (cprop->curr_sob_group_idx[stream] + 1) & 1400 (HL_RSVD_SOBS - 1); 1401 1402 gaudi_collective_map_sobs(hdev, stream); 1403 1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1405 cprop->curr_sob_group_idx[stream], stream); 1406 } 1407 1408 mb(); 1409 hl_fence_put(cs->signal_fence); 1410 cs->signal_fence = NULL; 1411 1412 return 0; 1413 } 1414 1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1416 { 1417 u32 cacheline_end, additional_commands; 1418 1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1420 additional_commands = sizeof(struct packet_msg_prot) * 2; 1421 1422 if (user_cb_size + additional_commands > cacheline_end) 1423 return cacheline_end - user_cb_size + additional_commands; 1424 else 1425 return additional_commands; 1426 } 1427 1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1429 struct hl_ctx *ctx, struct hl_cs *cs, 1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1431 u32 encaps_signal_offset) 1432 { 1433 struct hw_queue_properties *hw_queue_prop; 1434 struct hl_cs_counters_atomic *cntr; 1435 struct hl_cs_job *job; 1436 struct hl_cb *cb; 1437 u32 cb_size; 1438 bool patched_cb; 1439 1440 cntr = &hdev->aggregated_cs_counters; 1441 1442 if (mode == HL_COLLECTIVE_MASTER) { 1443 /* CB size of collective master queue contains 1444 * 4 msg short packets for monitor 1 configuration 1445 * 1 fence packet 1446 * 4 msg short packets for monitor 2 configuration 1447 * 1 fence packet 1448 * 2 msg prot packets for completion and MSI 1449 */ 1450 cb_size = sizeof(struct packet_msg_short) * 8 + 1451 sizeof(struct packet_fence) * 2 + 1452 sizeof(struct packet_msg_prot) * 2; 1453 patched_cb = true; 1454 } else { 1455 /* CB size of collective slave queues contains 1456 * 4 msg short packets for monitor configuration 1457 * 1 fence packet 1458 * 1 additional msg short packet for sob signal 1459 */ 1460 cb_size = sizeof(struct packet_msg_short) * 5 + 1461 sizeof(struct packet_fence); 1462 patched_cb = false; 1463 } 1464 1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1466 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1467 if (!job) { 1468 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1469 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1470 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1471 return -ENOMEM; 1472 } 1473 1474 /* Allocate internal mapped CB for non patched CBs */ 1475 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb); 1476 if (!cb) { 1477 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1478 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1479 kfree(job); 1480 return -EFAULT; 1481 } 1482 1483 job->id = 0; 1484 job->cs = cs; 1485 job->user_cb = cb; 1486 atomic_inc(&job->user_cb->cs_cnt); 1487 job->user_cb_size = cb_size; 1488 job->hw_queue_id = queue_id; 1489 1490 /* since its guaranteed to have only one chunk in the collective wait 1491 * cs, we can use this chunk to set the encapsulated signal offset 1492 * in the jobs. 1493 */ 1494 if (cs->encaps_signals) 1495 job->encaps_sig_wait_offset = encaps_signal_offset; 1496 1497 /* 1498 * No need in parsing, user CB is the patched CB. 1499 * We call hl_cb_destroy() out of two reasons - we don't need 1500 * the CB in the CB idr anymore and to decrement its refcount as 1501 * it was incremented inside hl_cb_kernel_create(). 1502 */ 1503 if (patched_cb) 1504 job->patched_cb = job->user_cb; 1505 else 1506 job->patched_cb = NULL; 1507 1508 job->job_cb_size = job->user_cb_size; 1509 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1510 1511 /* increment refcount as for external queues we get completion */ 1512 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1513 cs_get(cs); 1514 1515 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1516 1517 list_add_tail(&job->cs_node, &cs->job_list); 1518 1519 hl_debugfs_add_job(hdev, job); 1520 1521 return 0; 1522 } 1523 1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1525 struct hl_ctx *ctx, struct hl_cs *cs, 1526 u32 wait_queue_id, u32 collective_engine_id, 1527 u32 encaps_signal_offset) 1528 { 1529 struct gaudi_device *gaudi = hdev->asic_specific; 1530 struct hw_queue_properties *hw_queue_prop; 1531 u32 queue_id, collective_queue, num_jobs; 1532 u32 stream, nic_queue, nic_idx = 0; 1533 bool skip; 1534 int i, rc = 0; 1535 1536 /* Verify wait queue id is configured as master */ 1537 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1538 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1539 dev_err(hdev->dev, 1540 "Queue %d is not configured as collective master\n", 1541 wait_queue_id); 1542 return -EINVAL; 1543 } 1544 1545 /* Verify engine id is supported */ 1546 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1547 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1548 dev_err(hdev->dev, 1549 "Collective wait does not support engine %u\n", 1550 collective_engine_id); 1551 return -EINVAL; 1552 } 1553 1554 stream = wait_queue_id % 4; 1555 1556 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1557 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1558 else 1559 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1560 1561 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1562 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1563 1564 /* First job goes to the collective master queue, it will wait for 1565 * the collective slave queues to finish execution. 1566 * The synchronization is done using two monitors: 1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1568 * reduction engine (DMA5/TPC7). 1569 * 1570 * Rest of the jobs goes to the collective slave queues which will 1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1572 */ 1573 for (i = 0 ; i < num_jobs ; i++) { 1574 if (i == 0) { 1575 queue_id = wait_queue_id; 1576 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1577 HL_COLLECTIVE_MASTER, queue_id, 1578 wait_queue_id, encaps_signal_offset); 1579 } else { 1580 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1581 if (gaudi->hw_cap_initialized & 1582 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1583 skip = false; 1584 else 1585 skip = true; 1586 1587 queue_id = nic_queue; 1588 nic_queue += 4; 1589 nic_idx++; 1590 1591 if (skip) 1592 continue; 1593 } else { 1594 queue_id = collective_queue; 1595 } 1596 1597 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1598 HL_COLLECTIVE_SLAVE, queue_id, 1599 wait_queue_id, encaps_signal_offset); 1600 } 1601 1602 if (rc) 1603 return rc; 1604 } 1605 1606 return rc; 1607 } 1608 1609 static int gaudi_late_init(struct hl_device *hdev) 1610 { 1611 struct gaudi_device *gaudi = hdev->asic_specific; 1612 int rc; 1613 1614 rc = gaudi->cpucp_info_get(hdev); 1615 if (rc) { 1616 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1617 return rc; 1618 } 1619 1620 if ((hdev->card_type == cpucp_card_type_pci) && 1621 (hdev->nic_ports_mask & 0x3)) { 1622 dev_info(hdev->dev, 1623 "PCI card detected, only 8 ports are enabled\n"); 1624 hdev->nic_ports_mask &= ~0x3; 1625 1626 /* Stop and disable unused NIC QMANs */ 1627 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1630 1631 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1634 1635 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1636 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1637 1638 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1639 } 1640 1641 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1642 if (rc) 1643 return rc; 1644 1645 /* Scrub both SRAM and DRAM */ 1646 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1647 if (rc) 1648 goto disable_pci_access; 1649 1650 rc = gaudi_fetch_psoc_frequency(hdev); 1651 if (rc) { 1652 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1653 goto disable_pci_access; 1654 } 1655 1656 rc = gaudi_mmu_clear_pgt_range(hdev); 1657 if (rc) { 1658 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1659 goto disable_pci_access; 1660 } 1661 1662 rc = gaudi_init_tpc_mem(hdev); 1663 if (rc) { 1664 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1665 goto disable_pci_access; 1666 } 1667 1668 rc = gaudi_collective_init(hdev); 1669 if (rc) { 1670 dev_err(hdev->dev, "Failed to init collective\n"); 1671 goto disable_pci_access; 1672 } 1673 1674 /* We only support a single ASID for the user, so for the sake of optimization, just 1675 * initialize the ASID one time during device initialization with the fixed value of 1 1676 */ 1677 gaudi_mmu_prepare(hdev, 1); 1678 1679 hl_fw_set_pll_profile(hdev); 1680 1681 return 0; 1682 1683 disable_pci_access: 1684 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1685 1686 return rc; 1687 } 1688 1689 static void gaudi_late_fini(struct hl_device *hdev) 1690 { 1691 hl_hwmon_release_resources(hdev); 1692 } 1693 1694 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1695 { 1696 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1697 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1698 int i, j, rc = 0; 1699 1700 /* 1701 * The device CPU works with 40-bits addresses, while bit 39 must be set 1702 * to '1' when accessing the host. 1703 * Bits 49:39 of the full host address are saved for a later 1704 * configuration of the HW to perform extension to 50 bits. 1705 * Because there is a single HW register that holds the extension bits, 1706 * these bits must be identical in all allocated range. 1707 */ 1708 1709 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1710 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1711 &dma_addr_arr[i], 1712 GFP_KERNEL | __GFP_ZERO); 1713 if (!virt_addr_arr[i]) { 1714 rc = -ENOMEM; 1715 goto free_dma_mem_arr; 1716 } 1717 1718 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1719 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1720 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1721 break; 1722 } 1723 1724 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1725 dev_err(hdev->dev, 1726 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1727 rc = -EFAULT; 1728 goto free_dma_mem_arr; 1729 } 1730 1731 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1732 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1733 hdev->cpu_pci_msb_addr = 1734 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1735 1736 if (!hdev->asic_prop.fw_security_enabled) 1737 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1738 1739 free_dma_mem_arr: 1740 for (j = 0 ; j < i ; j++) 1741 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1742 dma_addr_arr[j]); 1743 1744 return rc; 1745 } 1746 1747 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1748 { 1749 struct gaudi_device *gaudi = hdev->asic_specific; 1750 struct gaudi_internal_qman_info *q; 1751 u32 i; 1752 1753 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1754 q = &gaudi->internal_qmans[i]; 1755 if (!q->pq_kernel_addr) 1756 continue; 1757 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1758 } 1759 } 1760 1761 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1762 { 1763 struct gaudi_device *gaudi = hdev->asic_specific; 1764 struct gaudi_internal_qman_info *q; 1765 int rc, i; 1766 1767 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1768 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1769 continue; 1770 1771 q = &gaudi->internal_qmans[i]; 1772 1773 switch (i) { 1774 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1775 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1776 break; 1777 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1778 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1779 break; 1780 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1781 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1782 break; 1783 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1784 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1785 break; 1786 default: 1787 dev_err(hdev->dev, "Bad internal queue index %d", i); 1788 rc = -EINVAL; 1789 goto free_internal_qmans_pq_mem; 1790 } 1791 1792 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1793 GFP_KERNEL | __GFP_ZERO); 1794 if (!q->pq_kernel_addr) { 1795 rc = -ENOMEM; 1796 goto free_internal_qmans_pq_mem; 1797 } 1798 } 1799 1800 return 0; 1801 1802 free_internal_qmans_pq_mem: 1803 gaudi_free_internal_qmans_pq_mem(hdev); 1804 return rc; 1805 } 1806 1807 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1808 { 1809 struct asic_fixed_properties *prop = &hdev->asic_prop; 1810 struct pci_mem_region *region; 1811 1812 /* CFG */ 1813 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1814 region->region_base = CFG_BASE; 1815 region->region_size = CFG_SIZE; 1816 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1817 region->bar_size = CFG_BAR_SIZE; 1818 region->bar_id = CFG_BAR_ID; 1819 region->used = 1; 1820 1821 /* SRAM */ 1822 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1823 region->region_base = SRAM_BASE_ADDR; 1824 region->region_size = SRAM_SIZE; 1825 region->offset_in_bar = 0; 1826 region->bar_size = SRAM_BAR_SIZE; 1827 region->bar_id = SRAM_BAR_ID; 1828 region->used = 1; 1829 1830 /* DRAM */ 1831 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1832 region->region_base = DRAM_PHYS_BASE; 1833 region->region_size = hdev->asic_prop.dram_size; 1834 region->offset_in_bar = 0; 1835 region->bar_size = prop->dram_pci_bar_size; 1836 region->bar_id = HBM_BAR_ID; 1837 region->used = 1; 1838 1839 /* SP SRAM */ 1840 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1841 region->region_base = PSOC_SCRATCHPAD_ADDR; 1842 region->region_size = PSOC_SCRATCHPAD_SIZE; 1843 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1844 region->bar_size = CFG_BAR_SIZE; 1845 region->bar_id = CFG_BAR_ID; 1846 region->used = 1; 1847 } 1848 1849 static int gaudi_sw_init(struct hl_device *hdev) 1850 { 1851 struct gaudi_device *gaudi; 1852 u32 i, event_id = 0; 1853 int rc; 1854 1855 /* Allocate device structure */ 1856 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1857 if (!gaudi) 1858 return -ENOMEM; 1859 1860 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1861 if (gaudi_irq_map_table[i].valid) { 1862 if (event_id == GAUDI_EVENT_SIZE) { 1863 dev_err(hdev->dev, 1864 "Event array exceeds the limit of %u events\n", 1865 GAUDI_EVENT_SIZE); 1866 rc = -EINVAL; 1867 goto free_gaudi_device; 1868 } 1869 1870 gaudi->events[event_id++] = 1871 gaudi_irq_map_table[i].fc_id; 1872 } 1873 } 1874 1875 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1876 1877 hdev->asic_specific = gaudi; 1878 1879 /* Create DMA pool for small allocations */ 1880 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1881 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1882 if (!hdev->dma_pool) { 1883 dev_err(hdev->dev, "failed to create DMA pool\n"); 1884 rc = -ENOMEM; 1885 goto free_gaudi_device; 1886 } 1887 1888 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1889 if (rc) 1890 goto free_dma_pool; 1891 1892 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1893 if (!hdev->cpu_accessible_dma_pool) { 1894 dev_err(hdev->dev, 1895 "Failed to create CPU accessible DMA pool\n"); 1896 rc = -ENOMEM; 1897 goto free_cpu_dma_mem; 1898 } 1899 1900 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1901 (uintptr_t) hdev->cpu_accessible_dma_mem, 1902 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1903 if (rc) { 1904 dev_err(hdev->dev, 1905 "Failed to add memory to CPU accessible DMA pool\n"); 1906 rc = -EFAULT; 1907 goto free_cpu_accessible_dma_pool; 1908 } 1909 1910 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1911 if (rc) 1912 goto free_cpu_accessible_dma_pool; 1913 1914 spin_lock_init(&gaudi->hw_queues_lock); 1915 1916 hdev->supports_sync_stream = true; 1917 hdev->supports_coresight = true; 1918 hdev->supports_staged_submission = true; 1919 hdev->supports_wait_for_multi_cs = true; 1920 1921 hdev->asic_funcs->set_pci_memory_regions(hdev); 1922 hdev->stream_master_qid_arr = 1923 hdev->asic_funcs->get_stream_master_qid_arr(); 1924 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1925 1926 return 0; 1927 1928 free_cpu_accessible_dma_pool: 1929 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1930 free_cpu_dma_mem: 1931 if (!hdev->asic_prop.fw_security_enabled) 1932 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1933 hdev->cpu_pci_msb_addr); 1934 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1935 hdev->cpu_accessible_dma_address); 1936 free_dma_pool: 1937 dma_pool_destroy(hdev->dma_pool); 1938 free_gaudi_device: 1939 kfree(gaudi); 1940 return rc; 1941 } 1942 1943 static int gaudi_sw_fini(struct hl_device *hdev) 1944 { 1945 struct gaudi_device *gaudi = hdev->asic_specific; 1946 1947 gaudi_free_internal_qmans_pq_mem(hdev); 1948 1949 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1950 1951 if (!hdev->asic_prop.fw_security_enabled) 1952 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1953 hdev->cpu_pci_msb_addr); 1954 1955 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1956 hdev->cpu_accessible_dma_address); 1957 1958 dma_pool_destroy(hdev->dma_pool); 1959 1960 kfree(gaudi); 1961 1962 return 0; 1963 } 1964 1965 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1966 { 1967 struct hl_device *hdev = arg; 1968 int i; 1969 1970 if (hdev->disabled) 1971 return IRQ_HANDLED; 1972 1973 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1974 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1975 1976 hl_irq_handler_eq(irq, &hdev->event_queue); 1977 1978 return IRQ_HANDLED; 1979 } 1980 1981 /* 1982 * For backward compatibility, new MSI interrupts should be set after the 1983 * existing CPU and NIC interrupts. 1984 */ 1985 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1986 bool cpu_eq) 1987 { 1988 int msi_vec; 1989 1990 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1991 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1992 GAUDI_EVENT_QUEUE_MSI_IDX); 1993 1994 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1995 (nr + NIC_NUMBER_OF_ENGINES + 1); 1996 1997 return pci_irq_vector(hdev->pdev, msi_vec); 1998 } 1999 2000 static int gaudi_enable_msi_single(struct hl_device *hdev) 2001 { 2002 int rc, irq; 2003 2004 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2005 2006 irq = gaudi_pci_irq_vector(hdev, 0, false); 2007 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2008 "gaudi single msi", hdev); 2009 if (rc) 2010 dev_err(hdev->dev, 2011 "Failed to request single MSI IRQ\n"); 2012 2013 return rc; 2014 } 2015 2016 static int gaudi_enable_msi(struct hl_device *hdev) 2017 { 2018 struct gaudi_device *gaudi = hdev->asic_specific; 2019 int rc; 2020 2021 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2022 return 0; 2023 2024 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2025 if (rc < 0) { 2026 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2027 return rc; 2028 } 2029 2030 rc = gaudi_enable_msi_single(hdev); 2031 if (rc) 2032 goto free_pci_irq_vectors; 2033 2034 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2035 2036 return 0; 2037 2038 free_pci_irq_vectors: 2039 pci_free_irq_vectors(hdev->pdev); 2040 return rc; 2041 } 2042 2043 static void gaudi_sync_irqs(struct hl_device *hdev) 2044 { 2045 struct gaudi_device *gaudi = hdev->asic_specific; 2046 2047 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2048 return; 2049 2050 /* Wait for all pending IRQs to be finished */ 2051 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2052 } 2053 2054 static void gaudi_disable_msi(struct hl_device *hdev) 2055 { 2056 struct gaudi_device *gaudi = hdev->asic_specific; 2057 2058 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2059 return; 2060 2061 gaudi_sync_irqs(hdev); 2062 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2063 pci_free_irq_vectors(hdev->pdev); 2064 2065 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2066 } 2067 2068 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2069 { 2070 struct gaudi_device *gaudi = hdev->asic_specific; 2071 2072 if (hdev->asic_prop.fw_security_enabled) 2073 return; 2074 2075 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2076 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2077 return; 2078 2079 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2080 return; 2081 2082 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2083 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2084 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2086 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2088 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2090 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2092 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2094 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2096 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2098 2099 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2101 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2103 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2105 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2107 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2109 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2111 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2113 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2115 2116 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2117 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2118 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2120 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2122 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2124 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2126 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2128 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2130 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2132 2133 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2134 } 2135 2136 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2137 { 2138 struct gaudi_device *gaudi = hdev->asic_specific; 2139 2140 if (hdev->asic_prop.fw_security_enabled) 2141 return; 2142 2143 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2144 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2145 return; 2146 2147 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2148 return; 2149 2150 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2152 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2154 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2156 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2158 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2160 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2162 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2164 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2166 2167 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2169 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2171 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2173 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2175 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2177 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2179 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2181 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2183 2184 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2185 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2190 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2194 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2196 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2198 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2200 2201 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2202 } 2203 2204 static void gaudi_init_e2e(struct hl_device *hdev) 2205 { 2206 if (hdev->asic_prop.fw_security_enabled) 2207 return; 2208 2209 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2210 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2211 return; 2212 2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2214 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2216 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2217 2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2219 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2221 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2222 2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2224 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2226 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2227 2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2229 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2231 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2232 2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2234 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2236 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2237 2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2239 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2241 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2242 2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2244 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2246 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2247 2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2249 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2251 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2252 2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2254 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2256 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2257 2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2259 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2261 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2262 2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2264 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2266 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2267 2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2269 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2271 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2272 2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2274 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2276 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2277 2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2279 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2281 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2282 2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2284 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2286 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2287 2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2289 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2291 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2292 2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2297 2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2302 2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2307 2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2312 2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2317 2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2322 2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2327 2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2332 2333 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2334 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2335 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2336 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2337 2338 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2339 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2340 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2341 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2342 2343 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2344 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2345 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2346 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2347 2348 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2349 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2350 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2351 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2352 2353 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2354 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2355 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2356 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2357 2358 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2359 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2360 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2361 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2362 2363 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2364 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2365 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2366 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2367 2368 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2369 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2370 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2371 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2372 2373 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2374 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2375 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2376 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2377 2378 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2379 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2380 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2381 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2382 2383 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2384 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2385 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2386 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2387 2388 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2389 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2390 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2391 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2392 2393 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2394 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2395 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2396 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2397 2398 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2399 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2400 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2401 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2402 2403 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2404 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2405 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2406 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2407 2408 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2409 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2410 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2411 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2412 2413 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2414 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2416 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2417 2418 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2419 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2421 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2422 2423 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2424 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2426 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2427 2428 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2429 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2431 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2432 2433 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2434 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2436 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2437 2438 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2439 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2441 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2442 2443 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2444 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2446 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2447 2448 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2449 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2451 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2452 } 2453 2454 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2455 { 2456 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2457 2458 if (hdev->asic_prop.fw_security_enabled) 2459 return; 2460 2461 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2462 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2463 return; 2464 2465 hbm0_wr = 0x33333333; 2466 hbm0_rd = 0x77777777; 2467 hbm1_wr = 0x55555555; 2468 hbm1_rd = 0xDDDDDDDD; 2469 2470 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2471 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2472 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2473 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2474 2475 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2476 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2477 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2478 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2479 2480 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2481 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2482 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2483 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2484 2485 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2486 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2487 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2488 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2489 2490 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2491 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2492 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2493 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2496 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2499 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2502 2503 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2506 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2509 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2512 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2515 } 2516 2517 static void gaudi_init_golden_registers(struct hl_device *hdev) 2518 { 2519 u32 tpc_offset; 2520 int tpc_id, i; 2521 2522 gaudi_init_e2e(hdev); 2523 gaudi_init_hbm_cred(hdev); 2524 2525 for (tpc_id = 0, tpc_offset = 0; 2526 tpc_id < TPC_NUMBER_OF_ENGINES; 2527 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2528 /* Mask all arithmetic interrupts from TPC */ 2529 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2530 /* Set 16 cache lines */ 2531 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2532 ICACHE_FETCH_LINE_NUM, 2); 2533 } 2534 2535 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2536 for (i = 0 ; i < 128 ; i += 8) 2537 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2538 2539 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2540 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2541 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2542 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2543 } 2544 2545 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2546 int qman_id, dma_addr_t qman_pq_addr) 2547 { 2548 struct cpu_dyn_regs *dyn_regs = 2549 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2550 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2551 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2552 u32 q_off, dma_qm_offset; 2553 u32 dma_qm_err_cfg, irq_handler_offset; 2554 2555 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2556 2557 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2558 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2559 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2561 so_base_en_lo = lower_32_bits(CFG_BASE + 2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2563 so_base_en_hi = upper_32_bits(CFG_BASE + 2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2565 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2566 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2567 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2569 so_base_ws_lo = lower_32_bits(CFG_BASE + 2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2571 so_base_ws_hi = upper_32_bits(CFG_BASE + 2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2573 2574 q_off = dma_qm_offset + qman_id * 4; 2575 2576 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2577 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2578 2579 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2580 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2581 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2582 2583 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2584 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2585 QMAN_LDMA_SRC_OFFSET); 2586 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2587 QMAN_LDMA_DST_OFFSET); 2588 2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2590 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2592 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2594 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2596 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2597 2598 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2599 2600 /* The following configuration is needed only once per QMAN */ 2601 if (qman_id == 0) { 2602 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2603 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2604 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2605 2606 /* Configure RAZWI IRQ */ 2607 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2608 if (hdev->stop_on_err) 2609 dma_qm_err_cfg |= 2610 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2611 2612 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2613 2614 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2615 lower_32_bits(CFG_BASE + irq_handler_offset)); 2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2617 upper_32_bits(CFG_BASE + irq_handler_offset)); 2618 2619 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2620 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2621 dma_id); 2622 2623 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2624 QM_ARB_ERR_MSG_EN_MASK); 2625 2626 /* Set timeout to maximum */ 2627 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2628 2629 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2630 QMAN_EXTERNAL_MAKE_TRUSTED); 2631 2632 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2633 } 2634 } 2635 2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2637 { 2638 struct cpu_dyn_regs *dyn_regs = 2639 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2640 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2641 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2642 u32 irq_handler_offset; 2643 2644 /* Set to maximum possible according to physical size */ 2645 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2646 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2647 2648 /* WA for H/W bug H3-2116 */ 2649 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2650 2651 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2652 if (hdev->stop_on_err) 2653 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2654 2655 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2656 2657 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2659 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2660 2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2662 lower_32_bits(CFG_BASE + irq_handler_offset)); 2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2664 upper_32_bits(CFG_BASE + irq_handler_offset)); 2665 2666 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2667 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2668 WREG32(mmDMA0_CORE_PROT + dma_offset, 2669 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2670 /* If the channel is secured, it should be in MMU bypass mode */ 2671 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2672 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2673 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2674 } 2675 2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2677 u32 enable_mask) 2678 { 2679 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2680 2681 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2682 } 2683 2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2685 { 2686 struct gaudi_device *gaudi = hdev->asic_specific; 2687 struct hl_hw_queue *q; 2688 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2689 2690 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2691 return; 2692 2693 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2694 dma_id = gaudi_dma_assignment[i]; 2695 /* 2696 * For queues after the CPU Q need to add 1 to get the correct 2697 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2698 * order to get the correct MSI register. 2699 */ 2700 if (dma_id > 1) { 2701 cpu_skip = 1; 2702 nic_skip = NIC_NUMBER_OF_ENGINES; 2703 } else { 2704 cpu_skip = 0; 2705 nic_skip = 0; 2706 } 2707 2708 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2709 q_idx = 4 * dma_id + j + cpu_skip; 2710 q = &hdev->kernel_queues[q_idx]; 2711 q->cq_id = cq_id++; 2712 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2713 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2714 q->bus_address); 2715 } 2716 2717 gaudi_init_dma_core(hdev, dma_id); 2718 2719 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2720 } 2721 2722 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2723 } 2724 2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2726 int qman_id, u64 qman_base_addr) 2727 { 2728 struct cpu_dyn_regs *dyn_regs = 2729 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2730 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2731 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2732 u32 dma_qm_err_cfg, irq_handler_offset; 2733 u32 q_off, dma_qm_offset; 2734 2735 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2736 2737 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2739 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2741 so_base_en_lo = lower_32_bits(CFG_BASE + 2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2743 so_base_en_hi = upper_32_bits(CFG_BASE + 2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2745 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2747 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2749 so_base_ws_lo = lower_32_bits(CFG_BASE + 2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2751 so_base_ws_hi = upper_32_bits(CFG_BASE + 2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2753 2754 q_off = dma_qm_offset + qman_id * 4; 2755 2756 if (qman_id < 4) { 2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2758 lower_32_bits(qman_base_addr)); 2759 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2760 upper_32_bits(qman_base_addr)); 2761 2762 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2763 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2764 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2765 2766 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2767 QMAN_CPDMA_SIZE_OFFSET); 2768 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2769 QMAN_CPDMA_SRC_OFFSET); 2770 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2771 QMAN_CPDMA_DST_OFFSET); 2772 } else { 2773 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2774 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2775 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2776 2777 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2778 QMAN_LDMA_SIZE_OFFSET); 2779 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2780 QMAN_LDMA_SRC_OFFSET); 2781 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2782 QMAN_LDMA_DST_OFFSET); 2783 2784 /* Configure RAZWI IRQ */ 2785 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2786 if (hdev->stop_on_err) 2787 dma_qm_err_cfg |= 2788 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2789 2790 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2791 2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2793 lower_32_bits(CFG_BASE + irq_handler_offset)); 2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2795 upper_32_bits(CFG_BASE + irq_handler_offset)); 2796 2797 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2798 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2799 dma_id); 2800 2801 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2802 QM_ARB_ERR_MSG_EN_MASK); 2803 2804 /* Set timeout to maximum */ 2805 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2806 2807 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2808 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2809 QMAN_INTERNAL_MAKE_TRUSTED); 2810 } 2811 2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2816 2817 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2818 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2819 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2820 mtr_base_ws_lo); 2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2822 mtr_base_ws_hi); 2823 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2824 so_base_ws_lo); 2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2826 so_base_ws_hi); 2827 } 2828 } 2829 2830 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2831 { 2832 struct gaudi_device *gaudi = hdev->asic_specific; 2833 struct gaudi_internal_qman_info *q; 2834 u64 qman_base_addr; 2835 int i, j, dma_id, internal_q_index; 2836 2837 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2838 return; 2839 2840 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2841 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2842 2843 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2844 /* 2845 * Add the CPU queue in order to get the correct queue 2846 * number as all internal queue are placed after it 2847 */ 2848 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2849 2850 q = &gaudi->internal_qmans[internal_q_index]; 2851 qman_base_addr = (u64) q->pq_dma_addr; 2852 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2853 qman_base_addr); 2854 } 2855 2856 /* Initializing lower CP for HBM DMA QMAN */ 2857 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2858 2859 gaudi_init_dma_core(hdev, dma_id); 2860 2861 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2862 } 2863 2864 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2865 } 2866 2867 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2868 int qman_id, u64 qman_base_addr) 2869 { 2870 struct cpu_dyn_regs *dyn_regs = 2871 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2872 u32 mtr_base_lo, mtr_base_hi; 2873 u32 so_base_lo, so_base_hi; 2874 u32 irq_handler_offset; 2875 u32 q_off, mme_id; 2876 u32 mme_qm_err_cfg; 2877 2878 mtr_base_lo = lower_32_bits(CFG_BASE + 2879 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2880 mtr_base_hi = upper_32_bits(CFG_BASE + 2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2882 so_base_lo = lower_32_bits(CFG_BASE + 2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2884 so_base_hi = upper_32_bits(CFG_BASE + 2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2886 2887 q_off = mme_offset + qman_id * 4; 2888 2889 if (qman_id < 4) { 2890 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2891 lower_32_bits(qman_base_addr)); 2892 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2893 upper_32_bits(qman_base_addr)); 2894 2895 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2896 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2897 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2898 2899 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2900 QMAN_CPDMA_SIZE_OFFSET); 2901 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2902 QMAN_CPDMA_SRC_OFFSET); 2903 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2904 QMAN_CPDMA_DST_OFFSET); 2905 } else { 2906 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2907 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2908 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2909 2910 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2911 QMAN_LDMA_SIZE_OFFSET); 2912 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2913 QMAN_LDMA_SRC_OFFSET); 2914 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2915 QMAN_LDMA_DST_OFFSET); 2916 2917 /* Configure RAZWI IRQ */ 2918 mme_id = mme_offset / 2919 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2920 2921 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2922 if (hdev->stop_on_err) 2923 mme_qm_err_cfg |= 2924 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2925 2926 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2927 2928 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2929 lower_32_bits(CFG_BASE + irq_handler_offset)); 2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2931 upper_32_bits(CFG_BASE + irq_handler_offset)); 2932 2933 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2934 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2935 mme_id); 2936 2937 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2938 QM_ARB_ERR_MSG_EN_MASK); 2939 2940 /* Set timeout to maximum */ 2941 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 2942 2943 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2944 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2945 QMAN_INTERNAL_MAKE_TRUSTED); 2946 } 2947 2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2952 } 2953 2954 static void gaudi_init_mme_qmans(struct hl_device *hdev) 2955 { 2956 struct gaudi_device *gaudi = hdev->asic_specific; 2957 struct gaudi_internal_qman_info *q; 2958 u64 qman_base_addr; 2959 u32 mme_offset; 2960 int i, internal_q_index; 2961 2962 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2963 return; 2964 2965 /* 2966 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2967 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2968 */ 2969 2970 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2971 2972 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2973 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2974 q = &gaudi->internal_qmans[internal_q_index]; 2975 qman_base_addr = (u64) q->pq_dma_addr; 2976 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2977 qman_base_addr); 2978 if (i == 3) 2979 mme_offset = 0; 2980 } 2981 2982 /* Initializing lower CP for MME QMANs */ 2983 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2984 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2985 gaudi_init_mme_qman(hdev, 0, 4, 0); 2986 2987 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2988 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2989 2990 gaudi->hw_cap_initialized |= HW_CAP_MME; 2991 } 2992 2993 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 2994 int qman_id, u64 qman_base_addr) 2995 { 2996 struct cpu_dyn_regs *dyn_regs = 2997 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2998 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2999 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3000 u32 tpc_qm_err_cfg, irq_handler_offset; 3001 u32 q_off, tpc_id; 3002 3003 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3005 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3007 so_base_en_lo = lower_32_bits(CFG_BASE + 3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3009 so_base_en_hi = upper_32_bits(CFG_BASE + 3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3011 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3012 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3013 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3015 so_base_ws_lo = lower_32_bits(CFG_BASE + 3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3017 so_base_ws_hi = upper_32_bits(CFG_BASE + 3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3019 3020 q_off = tpc_offset + qman_id * 4; 3021 3022 tpc_id = tpc_offset / 3023 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3024 3025 if (qman_id < 4) { 3026 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3027 lower_32_bits(qman_base_addr)); 3028 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3029 upper_32_bits(qman_base_addr)); 3030 3031 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3032 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3033 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3034 3035 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3036 QMAN_CPDMA_SIZE_OFFSET); 3037 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3038 QMAN_CPDMA_SRC_OFFSET); 3039 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3040 QMAN_CPDMA_DST_OFFSET); 3041 } else { 3042 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3044 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3045 3046 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3047 QMAN_LDMA_SIZE_OFFSET); 3048 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3049 QMAN_LDMA_SRC_OFFSET); 3050 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3051 QMAN_LDMA_DST_OFFSET); 3052 3053 /* Configure RAZWI IRQ */ 3054 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3055 if (hdev->stop_on_err) 3056 tpc_qm_err_cfg |= 3057 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3058 3059 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3060 3061 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3062 lower_32_bits(CFG_BASE + irq_handler_offset)); 3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3064 upper_32_bits(CFG_BASE + irq_handler_offset)); 3065 3066 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3067 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3068 tpc_id); 3069 3070 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3071 QM_ARB_ERR_MSG_EN_MASK); 3072 3073 /* Set timeout to maximum */ 3074 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3075 3076 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3077 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3078 QMAN_INTERNAL_MAKE_TRUSTED); 3079 } 3080 3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3085 3086 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3087 if (tpc_id == 6) { 3088 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3089 mtr_base_ws_lo); 3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3091 mtr_base_ws_hi); 3092 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3093 so_base_ws_lo); 3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3095 so_base_ws_hi); 3096 } 3097 } 3098 3099 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3100 { 3101 struct gaudi_device *gaudi = hdev->asic_specific; 3102 struct gaudi_internal_qman_info *q; 3103 u64 qman_base_addr; 3104 u32 so_base_hi, tpc_offset = 0; 3105 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3106 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3107 int i, tpc_id, internal_q_index; 3108 3109 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3110 return; 3111 3112 so_base_hi = upper_32_bits(CFG_BASE + 3113 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3114 3115 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3116 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3117 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3118 tpc_id * QMAN_STREAMS + i; 3119 q = &gaudi->internal_qmans[internal_q_index]; 3120 qman_base_addr = (u64) q->pq_dma_addr; 3121 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3122 qman_base_addr); 3123 3124 if (i == 3) { 3125 /* Initializing lower CP for TPC QMAN */ 3126 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3127 3128 /* Enable the QMAN and TPC channel */ 3129 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3130 QMAN_TPC_ENABLE); 3131 } 3132 } 3133 3134 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3135 so_base_hi); 3136 3137 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3138 3139 gaudi->hw_cap_initialized |= 3140 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3141 } 3142 } 3143 3144 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3145 int qman_id, u64 qman_base_addr, int nic_id) 3146 { 3147 struct cpu_dyn_regs *dyn_regs = 3148 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3149 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3150 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3151 u32 nic_qm_err_cfg, irq_handler_offset; 3152 u32 q_off; 3153 3154 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3155 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3156 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3158 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3160 so_base_en_hi = upper_32_bits(CFG_BASE + 3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3162 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3163 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3164 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3166 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3168 so_base_ws_hi = upper_32_bits(CFG_BASE + 3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3170 3171 q_off = nic_offset + qman_id * 4; 3172 3173 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3174 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3175 3176 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3177 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3178 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3179 3180 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3181 QMAN_LDMA_SIZE_OFFSET); 3182 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3183 QMAN_LDMA_SRC_OFFSET); 3184 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3185 QMAN_LDMA_DST_OFFSET); 3186 3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3191 3192 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3197 3198 if (qman_id == 0) { 3199 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3200 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3201 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3202 3203 /* Configure RAZWI IRQ */ 3204 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3205 if (hdev->stop_on_err) 3206 nic_qm_err_cfg |= 3207 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3208 3209 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3210 3211 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3212 lower_32_bits(CFG_BASE + irq_handler_offset)); 3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3214 upper_32_bits(CFG_BASE + irq_handler_offset)); 3215 3216 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3217 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3218 nic_id); 3219 3220 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3221 QM_ARB_ERR_MSG_EN_MASK); 3222 3223 /* Set timeout to maximum */ 3224 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3225 3226 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3227 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3228 QMAN_INTERNAL_MAKE_TRUSTED); 3229 } 3230 } 3231 3232 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3233 { 3234 struct gaudi_device *gaudi = hdev->asic_specific; 3235 struct gaudi_internal_qman_info *q; 3236 u64 qman_base_addr; 3237 u32 nic_offset = 0; 3238 u32 nic_delta_between_qmans = 3239 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3240 u32 nic_delta_between_nics = 3241 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3242 int i, nic_id, internal_q_index; 3243 3244 if (!hdev->nic_ports_mask) 3245 return; 3246 3247 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3248 return; 3249 3250 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3251 3252 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3253 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3254 nic_offset += nic_delta_between_qmans; 3255 if (nic_id & 1) { 3256 nic_offset -= (nic_delta_between_qmans * 2); 3257 nic_offset += nic_delta_between_nics; 3258 } 3259 continue; 3260 } 3261 3262 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3263 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3264 nic_id * QMAN_STREAMS + i; 3265 q = &gaudi->internal_qmans[internal_q_index]; 3266 qman_base_addr = (u64) q->pq_dma_addr; 3267 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3268 qman_base_addr, nic_id); 3269 } 3270 3271 /* Enable the QMAN */ 3272 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3273 3274 nic_offset += nic_delta_between_qmans; 3275 if (nic_id & 1) { 3276 nic_offset -= (nic_delta_between_qmans * 2); 3277 nic_offset += nic_delta_between_nics; 3278 } 3279 3280 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3281 } 3282 } 3283 3284 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3285 { 3286 struct gaudi_device *gaudi = hdev->asic_specific; 3287 3288 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3289 return; 3290 3291 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3292 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3293 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3294 } 3295 3296 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3297 { 3298 struct gaudi_device *gaudi = hdev->asic_specific; 3299 3300 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3301 return; 3302 3303 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3304 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3305 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3306 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3307 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3308 } 3309 3310 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3311 { 3312 struct gaudi_device *gaudi = hdev->asic_specific; 3313 3314 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3315 return; 3316 3317 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3318 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3319 } 3320 3321 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3322 { 3323 struct gaudi_device *gaudi = hdev->asic_specific; 3324 u32 tpc_offset = 0; 3325 int tpc_id; 3326 3327 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3328 return; 3329 3330 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3331 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3332 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3333 } 3334 } 3335 3336 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3337 { 3338 struct gaudi_device *gaudi = hdev->asic_specific; 3339 u32 nic_mask, nic_offset = 0; 3340 u32 nic_delta_between_qmans = 3341 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3342 u32 nic_delta_between_nics = 3343 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3344 int nic_id; 3345 3346 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3347 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3348 3349 if (gaudi->hw_cap_initialized & nic_mask) 3350 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3351 3352 nic_offset += nic_delta_between_qmans; 3353 if (nic_id & 1) { 3354 nic_offset -= (nic_delta_between_qmans * 2); 3355 nic_offset += nic_delta_between_nics; 3356 } 3357 } 3358 } 3359 3360 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3361 { 3362 struct gaudi_device *gaudi = hdev->asic_specific; 3363 3364 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3365 return; 3366 3367 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3368 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3369 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3370 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3371 } 3372 3373 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3374 { 3375 struct gaudi_device *gaudi = hdev->asic_specific; 3376 3377 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3378 return; 3379 3380 /* Stop CPs of HBM DMA QMANs */ 3381 3382 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3383 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3384 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3385 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3386 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3387 } 3388 3389 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3390 { 3391 struct gaudi_device *gaudi = hdev->asic_specific; 3392 3393 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3394 return; 3395 3396 /* Stop CPs of MME QMANs */ 3397 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3398 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3399 } 3400 3401 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3402 { 3403 struct gaudi_device *gaudi = hdev->asic_specific; 3404 3405 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3406 return; 3407 3408 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3409 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3410 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3411 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3412 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3413 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3414 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3415 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3416 } 3417 3418 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3419 { 3420 struct gaudi_device *gaudi = hdev->asic_specific; 3421 3422 /* Stop upper CPs of QMANs */ 3423 3424 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3425 WREG32(mmNIC0_QM0_GLBL_CFG1, 3426 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3427 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3428 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3429 3430 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3431 WREG32(mmNIC0_QM1_GLBL_CFG1, 3432 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3433 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3434 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3435 3436 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3437 WREG32(mmNIC1_QM0_GLBL_CFG1, 3438 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3439 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3440 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3441 3442 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3443 WREG32(mmNIC1_QM1_GLBL_CFG1, 3444 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3445 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3446 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3447 3448 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3449 WREG32(mmNIC2_QM0_GLBL_CFG1, 3450 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3451 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3452 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3453 3454 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3455 WREG32(mmNIC2_QM1_GLBL_CFG1, 3456 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3457 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3458 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3459 3460 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3461 WREG32(mmNIC3_QM0_GLBL_CFG1, 3462 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3463 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3464 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3465 3466 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3467 WREG32(mmNIC3_QM1_GLBL_CFG1, 3468 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3469 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3470 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3471 3472 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3473 WREG32(mmNIC4_QM0_GLBL_CFG1, 3474 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3475 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3476 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3477 3478 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3479 WREG32(mmNIC4_QM1_GLBL_CFG1, 3480 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3481 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3482 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3483 } 3484 3485 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3486 { 3487 struct gaudi_device *gaudi = hdev->asic_specific; 3488 3489 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3490 return; 3491 3492 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3493 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3494 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3495 } 3496 3497 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3498 { 3499 struct gaudi_device *gaudi = hdev->asic_specific; 3500 3501 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3502 return; 3503 3504 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3505 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3506 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3507 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3508 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3509 } 3510 3511 static void gaudi_mme_stall(struct hl_device *hdev) 3512 { 3513 struct gaudi_device *gaudi = hdev->asic_specific; 3514 3515 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3516 return; 3517 3518 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3520 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3522 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3524 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3526 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3528 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3530 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3532 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3534 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3535 } 3536 3537 static void gaudi_tpc_stall(struct hl_device *hdev) 3538 { 3539 struct gaudi_device *gaudi = hdev->asic_specific; 3540 3541 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3542 return; 3543 3544 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3545 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3546 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3547 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3548 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3549 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3550 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3551 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3552 } 3553 3554 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3555 { 3556 u32 qman_offset; 3557 int i; 3558 3559 if (hdev->asic_prop.fw_security_enabled) 3560 return; 3561 3562 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3563 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3564 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3565 3566 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3567 } 3568 3569 WREG32(mmMME0_QM_CGM_CFG, 0); 3570 WREG32(mmMME0_QM_CGM_CFG1, 0); 3571 WREG32(mmMME2_QM_CGM_CFG, 0); 3572 WREG32(mmMME2_QM_CGM_CFG1, 0); 3573 3574 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3575 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3576 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3577 3578 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3579 } 3580 } 3581 3582 static void gaudi_enable_timestamp(struct hl_device *hdev) 3583 { 3584 /* Disable the timestamp counter */ 3585 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3586 3587 /* Zero the lower/upper parts of the 64-bit counter */ 3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3589 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3590 3591 /* Enable the counter */ 3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3593 } 3594 3595 static void gaudi_disable_timestamp(struct hl_device *hdev) 3596 { 3597 /* Disable the timestamp counter */ 3598 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3599 } 3600 3601 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3602 { 3603 u32 wait_timeout_ms; 3604 3605 if (hdev->pldm) 3606 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3607 else 3608 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3609 3610 if (fw_reset) 3611 goto skip_engines; 3612 3613 gaudi_stop_nic_qmans(hdev); 3614 gaudi_stop_mme_qmans(hdev); 3615 gaudi_stop_tpc_qmans(hdev); 3616 gaudi_stop_hbm_dma_qmans(hdev); 3617 gaudi_stop_pci_dma_qmans(hdev); 3618 3619 msleep(wait_timeout_ms); 3620 3621 gaudi_pci_dma_stall(hdev); 3622 gaudi_hbm_dma_stall(hdev); 3623 gaudi_tpc_stall(hdev); 3624 gaudi_mme_stall(hdev); 3625 3626 msleep(wait_timeout_ms); 3627 3628 gaudi_disable_nic_qmans(hdev); 3629 gaudi_disable_mme_qmans(hdev); 3630 gaudi_disable_tpc_qmans(hdev); 3631 gaudi_disable_hbm_dma_qmans(hdev); 3632 gaudi_disable_pci_dma_qmans(hdev); 3633 3634 gaudi_disable_timestamp(hdev); 3635 3636 skip_engines: 3637 gaudi_disable_msi(hdev); 3638 } 3639 3640 static int gaudi_mmu_init(struct hl_device *hdev) 3641 { 3642 struct asic_fixed_properties *prop = &hdev->asic_prop; 3643 struct gaudi_device *gaudi = hdev->asic_specific; 3644 u64 hop0_addr; 3645 int rc, i; 3646 3647 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3648 return 0; 3649 3650 for (i = 0 ; i < prop->max_asid ; i++) { 3651 hop0_addr = prop->mmu_pgt_addr + 3652 (i * prop->dmmu.hop_table_size); 3653 3654 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3655 if (rc) { 3656 dev_err(hdev->dev, 3657 "failed to set hop0 addr for asid %d\n", i); 3658 return rc; 3659 } 3660 } 3661 3662 /* init MMU cache manage page */ 3663 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3664 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3665 3666 /* mem cache invalidation */ 3667 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3668 3669 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3670 if (rc) 3671 return rc; 3672 3673 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3674 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3675 3676 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3677 3678 /* 3679 * The H/W expects the first PI after init to be 1. After wraparound 3680 * we'll write 0. 3681 */ 3682 gaudi->mmu_cache_inv_pi = 1; 3683 3684 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3685 3686 return 0; 3687 } 3688 3689 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3690 { 3691 void __iomem *dst; 3692 3693 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3694 3695 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3696 } 3697 3698 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3699 { 3700 void __iomem *dst; 3701 3702 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3703 3704 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3705 } 3706 3707 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3708 { 3709 struct dynamic_fw_load_mgr *dynamic_loader; 3710 struct cpu_dyn_regs *dyn_regs; 3711 3712 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3713 3714 /* 3715 * here we update initial values for few specific dynamic regs (as 3716 * before reading the first descriptor from FW those value has to be 3717 * hard-coded) in later stages of the protocol those values will be 3718 * updated automatically by reading the FW descriptor so data there 3719 * will always be up-to-date 3720 */ 3721 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3722 dyn_regs->kmd_msg_to_cpu = 3723 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3724 dyn_regs->cpu_cmd_status_to_host = 3725 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3726 3727 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3728 } 3729 3730 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3731 { 3732 struct static_fw_load_mgr *static_loader; 3733 3734 static_loader = &hdev->fw_loader.static_loader; 3735 3736 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3737 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3738 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3739 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3740 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3741 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3742 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3743 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3744 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3745 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3746 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3747 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3748 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3749 GAUDI_PLDM_RESET_WAIT_MSEC : 3750 GAUDI_CPU_RESET_WAIT_MSEC; 3751 } 3752 3753 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3754 { 3755 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3756 3757 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3758 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3759 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3760 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3761 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3762 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3763 } 3764 3765 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3766 { 3767 struct asic_fixed_properties *prop = &hdev->asic_prop; 3768 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3769 3770 /* fill common fields */ 3771 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3772 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3773 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3774 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3775 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3776 fw_loader->skip_bmc = !hdev->bmc_enable; 3777 fw_loader->sram_bar_id = SRAM_BAR_ID; 3778 fw_loader->dram_bar_id = HBM_BAR_ID; 3779 3780 if (prop->dynamic_fw_load) 3781 gaudi_init_dynamic_firmware_loader(hdev); 3782 else 3783 gaudi_init_static_firmware_loader(hdev); 3784 } 3785 3786 static int gaudi_init_cpu(struct hl_device *hdev) 3787 { 3788 struct gaudi_device *gaudi = hdev->asic_specific; 3789 int rc; 3790 3791 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3792 return 0; 3793 3794 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3795 return 0; 3796 3797 /* 3798 * The device CPU works with 40 bits addresses. 3799 * This register sets the extension to 50 bits. 3800 */ 3801 if (!hdev->asic_prop.fw_security_enabled) 3802 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3803 3804 rc = hl_fw_init_cpu(hdev); 3805 3806 if (rc) 3807 return rc; 3808 3809 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3810 3811 return 0; 3812 } 3813 3814 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3815 { 3816 struct cpu_dyn_regs *dyn_regs = 3817 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3818 struct asic_fixed_properties *prop = &hdev->asic_prop; 3819 struct gaudi_device *gaudi = hdev->asic_specific; 3820 u32 status, irq_handler_offset; 3821 struct hl_eq *eq; 3822 struct hl_hw_queue *cpu_pq = 3823 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3824 int err; 3825 3826 if (!hdev->cpu_queues_enable) 3827 return 0; 3828 3829 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3830 return 0; 3831 3832 eq = &hdev->event_queue; 3833 3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3835 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3836 3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3838 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3839 3840 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3841 lower_32_bits(hdev->cpu_accessible_dma_address)); 3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3843 upper_32_bits(hdev->cpu_accessible_dma_address)); 3844 3845 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3846 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3847 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3848 3849 /* Used for EQ CI */ 3850 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3851 3852 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3853 3854 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3855 3856 irq_handler_offset = prop->gic_interrupts_enable ? 3857 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3858 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3859 3860 WREG32(irq_handler_offset, 3861 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3862 3863 err = hl_poll_timeout( 3864 hdev, 3865 mmCPU_IF_QUEUE_INIT, 3866 status, 3867 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3868 1000, 3869 cpu_timeout); 3870 3871 if (err) { 3872 dev_err(hdev->dev, 3873 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3874 return -EIO; 3875 } 3876 3877 /* update FW application security bits */ 3878 if (prop->fw_cpu_boot_dev_sts0_valid) 3879 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3880 if (prop->fw_cpu_boot_dev_sts1_valid) 3881 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3882 3883 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3884 return 0; 3885 } 3886 3887 static void gaudi_pre_hw_init(struct hl_device *hdev) 3888 { 3889 /* Perform read from the device to make sure device is up */ 3890 RREG32(mmHW_STATE); 3891 3892 if (!hdev->asic_prop.fw_security_enabled) { 3893 /* Set the access through PCI bars (Linux driver only) as 3894 * secured 3895 */ 3896 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3897 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3898 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3899 3900 /* Perform read to flush the waiting writes to ensure 3901 * configuration was set in the device 3902 */ 3903 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3904 } 3905 3906 /* 3907 * Let's mark in the H/W that we have reached this point. We check 3908 * this value in the reset_before_init function to understand whether 3909 * we need to reset the chip before doing H/W init. This register is 3910 * cleared by the H/W upon H/W reset 3911 */ 3912 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3913 } 3914 3915 static int gaudi_hw_init(struct hl_device *hdev) 3916 { 3917 struct gaudi_device *gaudi = hdev->asic_specific; 3918 int rc; 3919 3920 gaudi_pre_hw_init(hdev); 3921 3922 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3923 * So we set it here and if anyone tries to move it later to 3924 * a different address, there will be an error 3925 */ 3926 if (hdev->asic_prop.iatu_done_by_fw) 3927 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3928 3929 /* 3930 * Before pushing u-boot/linux to device, need to set the hbm bar to 3931 * base address of dram 3932 */ 3933 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 3934 dev_err(hdev->dev, 3935 "failed to map HBM bar to DRAM base address\n"); 3936 return -EIO; 3937 } 3938 3939 rc = gaudi_init_cpu(hdev); 3940 if (rc) { 3941 dev_err(hdev->dev, "failed to initialize CPU\n"); 3942 return rc; 3943 } 3944 3945 /* In case the clock gating was enabled in preboot we need to disable 3946 * it here before touching the MME/TPC registers. 3947 */ 3948 gaudi_disable_clock_gating(hdev); 3949 3950 /* SRAM scrambler must be initialized after CPU is running from HBM */ 3951 gaudi_init_scrambler_sram(hdev); 3952 3953 /* This is here just in case we are working without CPU */ 3954 gaudi_init_scrambler_hbm(hdev); 3955 3956 gaudi_init_golden_registers(hdev); 3957 3958 rc = gaudi_mmu_init(hdev); 3959 if (rc) 3960 return rc; 3961 3962 gaudi_init_security(hdev); 3963 3964 gaudi_init_pci_dma_qmans(hdev); 3965 3966 gaudi_init_hbm_dma_qmans(hdev); 3967 3968 gaudi_init_mme_qmans(hdev); 3969 3970 gaudi_init_tpc_qmans(hdev); 3971 3972 gaudi_init_nic_qmans(hdev); 3973 3974 gaudi_enable_timestamp(hdev); 3975 3976 /* MSI must be enabled before CPU queues and NIC are initialized */ 3977 rc = gaudi_enable_msi(hdev); 3978 if (rc) 3979 goto disable_queues; 3980 3981 /* must be called after MSI was enabled */ 3982 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 3983 if (rc) { 3984 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3985 rc); 3986 goto disable_msi; 3987 } 3988 3989 /* Perform read from the device to flush all configuration */ 3990 RREG32(mmHW_STATE); 3991 3992 return 0; 3993 3994 disable_msi: 3995 gaudi_disable_msi(hdev); 3996 disable_queues: 3997 gaudi_disable_mme_qmans(hdev); 3998 gaudi_disable_pci_dma_qmans(hdev); 3999 4000 return rc; 4001 } 4002 4003 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4004 { 4005 struct cpu_dyn_regs *dyn_regs = 4006 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4007 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4008 struct gaudi_device *gaudi = hdev->asic_specific; 4009 bool driver_performs_reset; 4010 4011 if (!hard_reset) { 4012 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4013 return 0; 4014 } 4015 4016 if (hdev->pldm) { 4017 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4018 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4019 } else { 4020 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4021 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4022 } 4023 4024 if (fw_reset) { 4025 dev_dbg(hdev->dev, 4026 "Firmware performs HARD reset, going to wait %dms\n", 4027 reset_timeout_ms); 4028 4029 goto skip_reset; 4030 } 4031 4032 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4033 !hdev->asic_prop.hard_reset_done_by_fw); 4034 4035 /* Set device to handle FLR by H/W as we will put the device CPU to 4036 * halt mode 4037 */ 4038 if (driver_performs_reset) 4039 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4041 4042 /* If linux is loaded in the device CPU we need to communicate with it 4043 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4044 * registers in case of old F/Ws 4045 */ 4046 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4047 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4048 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4049 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4050 4051 WREG32(irq_handler_offset, 4052 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4053 4054 /* This is a hail-mary attempt to revive the card in the small chance that the 4055 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4056 * In that case, triggering reset through GIC won't help. We need to trigger the 4057 * reset as if Linux wasn't loaded. 4058 * 4059 * We do it only if the reset cause was HB, because that would be the indication 4060 * of such an event. 4061 * 4062 * In case watchdog hasn't expired but we still got HB, then this won't do any 4063 * damage. 4064 */ 4065 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4066 if (hdev->asic_prop.hard_reset_done_by_fw) 4067 hl_fw_ask_hard_reset_without_linux(hdev); 4068 else 4069 hl_fw_ask_halt_machine_without_linux(hdev); 4070 } 4071 } else { 4072 if (hdev->asic_prop.hard_reset_done_by_fw) 4073 hl_fw_ask_hard_reset_without_linux(hdev); 4074 else 4075 hl_fw_ask_halt_machine_without_linux(hdev); 4076 } 4077 4078 if (driver_performs_reset) { 4079 4080 /* Configure the reset registers. Must be done as early as 4081 * possible in case we fail during H/W initialization 4082 */ 4083 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4084 (CFG_RST_H_DMA_MASK | 4085 CFG_RST_H_MME_MASK | 4086 CFG_RST_H_SM_MASK | 4087 CFG_RST_H_TPC_7_MASK)); 4088 4089 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4090 4091 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4092 (CFG_RST_H_HBM_MASK | 4093 CFG_RST_H_TPC_7_MASK | 4094 CFG_RST_H_NIC_MASK | 4095 CFG_RST_H_SM_MASK | 4096 CFG_RST_H_DMA_MASK | 4097 CFG_RST_H_MME_MASK | 4098 CFG_RST_H_CPU_MASK | 4099 CFG_RST_H_MMU_MASK)); 4100 4101 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4102 (CFG_RST_L_IF_MASK | 4103 CFG_RST_L_PSOC_MASK | 4104 CFG_RST_L_TPC_MASK)); 4105 4106 msleep(cpu_timeout_ms); 4107 4108 /* Tell ASIC not to re-initialize PCIe */ 4109 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4110 4111 /* Restart BTL/BLR upon hard-reset */ 4112 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4113 4114 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4115 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4116 4117 dev_dbg(hdev->dev, 4118 "Issued HARD reset command, going to wait %dms\n", 4119 reset_timeout_ms); 4120 } else { 4121 dev_dbg(hdev->dev, 4122 "Firmware performs HARD reset, going to wait %dms\n", 4123 reset_timeout_ms); 4124 } 4125 4126 skip_reset: 4127 /* 4128 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4129 * itself is in reset. Need to wait until the reset is deasserted 4130 */ 4131 msleep(reset_timeout_ms); 4132 4133 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4134 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4135 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4136 return -ETIMEDOUT; 4137 } 4138 4139 if (gaudi) { 4140 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4141 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4142 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4143 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4144 HW_CAP_HBM_SCRAMBLER); 4145 4146 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4147 4148 hdev->device_cpu_is_halted = false; 4149 } 4150 return 0; 4151 } 4152 4153 static int gaudi_suspend(struct hl_device *hdev) 4154 { 4155 return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4156 } 4157 4158 static int gaudi_resume(struct hl_device *hdev) 4159 { 4160 return gaudi_init_iatu(hdev); 4161 } 4162 4163 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4164 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4165 { 4166 int rc; 4167 4168 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4169 VM_DONTCOPY | VM_NORESERVE); 4170 4171 #ifdef _HAS_DMA_MMAP_COHERENT 4172 /* 4173 * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP 4174 * so vm_insert_page() can handle it safely. Without this, the kernel 4175 * may BUG_ON due to VM_PFNMAP. 4176 */ 4177 if (is_vmalloc_addr(cpu_addr)) 4178 vm_flags_set(vma, VM_MIXEDMAP); 4179 4180 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4181 (dma_addr - HOST_PHYS_BASE), size); 4182 if (rc) 4183 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4184 #else 4185 4186 rc = remap_pfn_range(vma, vma->vm_start, 4187 virt_to_phys(cpu_addr) >> PAGE_SHIFT, 4188 size, vma->vm_page_prot); 4189 if (rc) 4190 dev_err(hdev->dev, "remap_pfn_range error %d", rc); 4191 4192 #endif 4193 4194 4195 return rc; 4196 } 4197 4198 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4199 { 4200 struct cpu_dyn_regs *dyn_regs = 4201 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4202 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4203 struct gaudi_device *gaudi = hdev->asic_specific; 4204 bool invalid_queue = false; 4205 int dma_id; 4206 4207 switch (hw_queue_id) { 4208 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4209 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4210 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4211 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4212 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4213 break; 4214 4215 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4216 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4217 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4218 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4219 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4220 break; 4221 4222 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4223 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4224 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4225 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4226 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4227 break; 4228 4229 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4230 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4231 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4232 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4233 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4234 break; 4235 4236 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4237 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4238 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4239 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4240 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4241 break; 4242 4243 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4244 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4245 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4246 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4247 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4248 break; 4249 4250 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4251 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4252 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4253 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4254 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4255 break; 4256 4257 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4258 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4259 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4260 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4261 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4262 break; 4263 4264 case GAUDI_QUEUE_ID_CPU_PQ: 4265 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4266 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4267 else 4268 invalid_queue = true; 4269 break; 4270 4271 case GAUDI_QUEUE_ID_MME_0_0: 4272 db_reg_offset = mmMME2_QM_PQ_PI_0; 4273 break; 4274 4275 case GAUDI_QUEUE_ID_MME_0_1: 4276 db_reg_offset = mmMME2_QM_PQ_PI_1; 4277 break; 4278 4279 case GAUDI_QUEUE_ID_MME_0_2: 4280 db_reg_offset = mmMME2_QM_PQ_PI_2; 4281 break; 4282 4283 case GAUDI_QUEUE_ID_MME_0_3: 4284 db_reg_offset = mmMME2_QM_PQ_PI_3; 4285 break; 4286 4287 case GAUDI_QUEUE_ID_MME_1_0: 4288 db_reg_offset = mmMME0_QM_PQ_PI_0; 4289 break; 4290 4291 case GAUDI_QUEUE_ID_MME_1_1: 4292 db_reg_offset = mmMME0_QM_PQ_PI_1; 4293 break; 4294 4295 case GAUDI_QUEUE_ID_MME_1_2: 4296 db_reg_offset = mmMME0_QM_PQ_PI_2; 4297 break; 4298 4299 case GAUDI_QUEUE_ID_MME_1_3: 4300 db_reg_offset = mmMME0_QM_PQ_PI_3; 4301 break; 4302 4303 case GAUDI_QUEUE_ID_TPC_0_0: 4304 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4305 break; 4306 4307 case GAUDI_QUEUE_ID_TPC_0_1: 4308 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4309 break; 4310 4311 case GAUDI_QUEUE_ID_TPC_0_2: 4312 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4313 break; 4314 4315 case GAUDI_QUEUE_ID_TPC_0_3: 4316 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4317 break; 4318 4319 case GAUDI_QUEUE_ID_TPC_1_0: 4320 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4321 break; 4322 4323 case GAUDI_QUEUE_ID_TPC_1_1: 4324 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4325 break; 4326 4327 case GAUDI_QUEUE_ID_TPC_1_2: 4328 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4329 break; 4330 4331 case GAUDI_QUEUE_ID_TPC_1_3: 4332 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4333 break; 4334 4335 case GAUDI_QUEUE_ID_TPC_2_0: 4336 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4337 break; 4338 4339 case GAUDI_QUEUE_ID_TPC_2_1: 4340 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4341 break; 4342 4343 case GAUDI_QUEUE_ID_TPC_2_2: 4344 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4345 break; 4346 4347 case GAUDI_QUEUE_ID_TPC_2_3: 4348 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4349 break; 4350 4351 case GAUDI_QUEUE_ID_TPC_3_0: 4352 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4353 break; 4354 4355 case GAUDI_QUEUE_ID_TPC_3_1: 4356 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4357 break; 4358 4359 case GAUDI_QUEUE_ID_TPC_3_2: 4360 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4361 break; 4362 4363 case GAUDI_QUEUE_ID_TPC_3_3: 4364 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4365 break; 4366 4367 case GAUDI_QUEUE_ID_TPC_4_0: 4368 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4369 break; 4370 4371 case GAUDI_QUEUE_ID_TPC_4_1: 4372 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4373 break; 4374 4375 case GAUDI_QUEUE_ID_TPC_4_2: 4376 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4377 break; 4378 4379 case GAUDI_QUEUE_ID_TPC_4_3: 4380 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4381 break; 4382 4383 case GAUDI_QUEUE_ID_TPC_5_0: 4384 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4385 break; 4386 4387 case GAUDI_QUEUE_ID_TPC_5_1: 4388 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4389 break; 4390 4391 case GAUDI_QUEUE_ID_TPC_5_2: 4392 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4393 break; 4394 4395 case GAUDI_QUEUE_ID_TPC_5_3: 4396 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4397 break; 4398 4399 case GAUDI_QUEUE_ID_TPC_6_0: 4400 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4401 break; 4402 4403 case GAUDI_QUEUE_ID_TPC_6_1: 4404 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4405 break; 4406 4407 case GAUDI_QUEUE_ID_TPC_6_2: 4408 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4409 break; 4410 4411 case GAUDI_QUEUE_ID_TPC_6_3: 4412 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4413 break; 4414 4415 case GAUDI_QUEUE_ID_TPC_7_0: 4416 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4417 break; 4418 4419 case GAUDI_QUEUE_ID_TPC_7_1: 4420 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4421 break; 4422 4423 case GAUDI_QUEUE_ID_TPC_7_2: 4424 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4425 break; 4426 4427 case GAUDI_QUEUE_ID_TPC_7_3: 4428 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4429 break; 4430 4431 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4432 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4433 invalid_queue = true; 4434 4435 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4436 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4437 break; 4438 4439 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4440 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4441 invalid_queue = true; 4442 4443 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4444 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4445 break; 4446 4447 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4448 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4449 invalid_queue = true; 4450 4451 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4452 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4453 break; 4454 4455 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4456 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4457 invalid_queue = true; 4458 4459 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4460 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4461 break; 4462 4463 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4464 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4465 invalid_queue = true; 4466 4467 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4468 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4469 break; 4470 4471 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4472 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4473 invalid_queue = true; 4474 4475 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4476 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4477 break; 4478 4479 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4480 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4481 invalid_queue = true; 4482 4483 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4484 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4485 break; 4486 4487 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4488 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4489 invalid_queue = true; 4490 4491 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4492 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4493 break; 4494 4495 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4496 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4497 invalid_queue = true; 4498 4499 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4500 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4501 break; 4502 4503 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4504 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4505 invalid_queue = true; 4506 4507 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4508 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4509 break; 4510 4511 default: 4512 invalid_queue = true; 4513 } 4514 4515 if (invalid_queue) { 4516 /* Should never get here */ 4517 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4518 hw_queue_id); 4519 return; 4520 } 4521 4522 db_value = pi; 4523 4524 /* ring the doorbell */ 4525 WREG32(db_reg_offset, db_value); 4526 4527 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4528 /* make sure device CPU will read latest data from host */ 4529 mb(); 4530 4531 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4532 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4533 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4534 4535 WREG32(irq_handler_offset, 4536 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4537 } 4538 } 4539 4540 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4541 struct hl_bd *bd) 4542 { 4543 __le64 *pbd = (__le64 *) bd; 4544 4545 /* The QMANs are on the host memory so a simple copy suffice */ 4546 pqe[0] = pbd[0]; 4547 pqe[1] = pbd[1]; 4548 } 4549 4550 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4551 dma_addr_t *dma_handle, gfp_t flags) 4552 { 4553 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4554 dma_handle, flags); 4555 4556 /* Shift to the device's base physical address of host memory */ 4557 if (kernel_addr) 4558 *dma_handle += HOST_PHYS_BASE; 4559 4560 return kernel_addr; 4561 } 4562 4563 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4564 void *cpu_addr, dma_addr_t dma_handle) 4565 { 4566 /* Cancel the device's base physical address of host memory */ 4567 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4568 4569 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4570 } 4571 4572 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4573 { 4574 struct asic_fixed_properties *prop = &hdev->asic_prop; 4575 u64 cur_addr = prop->dram_user_base_address; 4576 u32 chunk_size, busy; 4577 int rc, dma_id; 4578 4579 while (cur_addr < prop->dram_end_address) { 4580 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4581 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4582 4583 chunk_size = 4584 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4585 4586 dev_dbg(hdev->dev, 4587 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4588 cur_addr, cur_addr + chunk_size); 4589 4590 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4591 lower_32_bits(val)); 4592 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4593 upper_32_bits(val)); 4594 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4595 lower_32_bits(cur_addr)); 4596 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4597 upper_32_bits(cur_addr)); 4598 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4599 chunk_size); 4600 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4601 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4602 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4603 4604 cur_addr += chunk_size; 4605 4606 if (cur_addr == prop->dram_end_address) 4607 break; 4608 } 4609 4610 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4611 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4612 4613 rc = hl_poll_timeout( 4614 hdev, 4615 mmDMA0_CORE_STS0 + dma_offset, 4616 busy, 4617 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4618 1000, 4619 HBM_SCRUBBING_TIMEOUT_US); 4620 4621 if (rc) { 4622 dev_err(hdev->dev, 4623 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4624 dma_id); 4625 return -EIO; 4626 } 4627 } 4628 } 4629 4630 return 0; 4631 } 4632 4633 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4634 { 4635 struct asic_fixed_properties *prop = &hdev->asic_prop; 4636 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US; 4637 u64 addr, size, val = hdev->memory_scrub_val; 4638 ktime_t timeout; 4639 int rc = 0; 4640 4641 if (!hdev->memory_scrub) 4642 return 0; 4643 4644 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4645 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4646 if (ktime_compare(ktime_get(), timeout) > 0) { 4647 dev_err(hdev->dev, "waiting for idle timeout\n"); 4648 return -ETIMEDOUT; 4649 } 4650 usleep_range((1000 >> 2) + 1, 1000); 4651 } 4652 4653 /* Scrub SRAM */ 4654 addr = prop->sram_user_base_address; 4655 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4656 4657 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4658 addr, addr + size, val); 4659 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4660 if (rc) { 4661 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4662 return rc; 4663 } 4664 4665 /* Scrub HBM using all DMA channels in parallel */ 4666 rc = gaudi_scrub_device_dram(hdev, val); 4667 if (rc) { 4668 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4669 return rc; 4670 } 4671 4672 return 0; 4673 } 4674 4675 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4676 u32 queue_id, dma_addr_t *dma_handle, 4677 u16 *queue_len) 4678 { 4679 struct gaudi_device *gaudi = hdev->asic_specific; 4680 struct gaudi_internal_qman_info *q; 4681 4682 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4683 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4684 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4685 return NULL; 4686 } 4687 4688 q = &gaudi->internal_qmans[queue_id]; 4689 *dma_handle = q->pq_dma_addr; 4690 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4691 4692 return q->pq_kernel_addr; 4693 } 4694 4695 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4696 u16 len, u32 timeout, u64 *result) 4697 { 4698 struct gaudi_device *gaudi = hdev->asic_specific; 4699 4700 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4701 if (result) 4702 *result = 0; 4703 return 0; 4704 } 4705 4706 if (!timeout) 4707 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4708 4709 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4710 timeout, result); 4711 } 4712 4713 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4714 { 4715 struct packet_msg_prot *fence_pkt; 4716 dma_addr_t pkt_dma_addr; 4717 u32 fence_val, tmp, timeout_usec; 4718 dma_addr_t fence_dma_addr; 4719 u32 *fence_ptr; 4720 int rc; 4721 4722 if (hdev->pldm) 4723 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4724 else 4725 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4726 4727 fence_val = GAUDI_QMAN0_FENCE_VAL; 4728 4729 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4730 if (!fence_ptr) { 4731 dev_err(hdev->dev, 4732 "Failed to allocate memory for H/W queue %d testing\n", 4733 hw_queue_id); 4734 return -ENOMEM; 4735 } 4736 4737 *fence_ptr = 0; 4738 4739 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4740 &pkt_dma_addr); 4741 if (!fence_pkt) { 4742 dev_err(hdev->dev, 4743 "Failed to allocate packet for H/W queue %d testing\n", 4744 hw_queue_id); 4745 rc = -ENOMEM; 4746 goto free_fence_ptr; 4747 } 4748 4749 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4750 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4751 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4752 4753 fence_pkt->ctl = cpu_to_le32(tmp); 4754 fence_pkt->value = cpu_to_le32(fence_val); 4755 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4756 4757 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4758 sizeof(struct packet_msg_prot), 4759 pkt_dma_addr); 4760 if (rc) { 4761 dev_err(hdev->dev, 4762 "Failed to send fence packet to H/W queue %d\n", 4763 hw_queue_id); 4764 goto free_pkt; 4765 } 4766 4767 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4768 1000, timeout_usec, true); 4769 4770 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4771 4772 if (rc == -ETIMEDOUT) { 4773 dev_err(hdev->dev, 4774 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4775 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4776 rc = -EIO; 4777 } 4778 4779 free_pkt: 4780 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4781 free_fence_ptr: 4782 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4783 return rc; 4784 } 4785 4786 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4787 { 4788 struct gaudi_device *gaudi = hdev->asic_specific; 4789 4790 /* 4791 * check capability here as send_cpu_message() won't update the result 4792 * value if no capability 4793 */ 4794 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4795 return 0; 4796 4797 return hl_fw_test_cpu_queue(hdev); 4798 } 4799 4800 static int gaudi_test_queues(struct hl_device *hdev) 4801 { 4802 int i, rc, ret_val = 0; 4803 4804 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4805 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4806 rc = gaudi_test_queue(hdev, i); 4807 if (rc) 4808 ret_val = -EINVAL; 4809 } 4810 } 4811 4812 rc = gaudi_test_cpu_queue(hdev); 4813 if (rc) 4814 ret_val = -EINVAL; 4815 4816 return ret_val; 4817 } 4818 4819 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4820 gfp_t mem_flags, dma_addr_t *dma_handle) 4821 { 4822 void *kernel_addr; 4823 4824 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4825 return NULL; 4826 4827 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4828 4829 /* Shift to the device's base physical address of host memory */ 4830 if (kernel_addr) 4831 *dma_handle += HOST_PHYS_BASE; 4832 4833 return kernel_addr; 4834 } 4835 4836 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4837 dma_addr_t dma_addr) 4838 { 4839 /* Cancel the device's base physical address of host memory */ 4840 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4841 4842 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4843 } 4844 4845 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4846 size_t size, dma_addr_t *dma_handle) 4847 { 4848 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4849 } 4850 4851 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4852 size_t size, void *vaddr) 4853 { 4854 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4855 } 4856 4857 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4858 { 4859 struct scatterlist *sg, *sg_next_iter; 4860 u32 count, dma_desc_cnt; 4861 u64 len, len_next; 4862 dma_addr_t addr, addr_next; 4863 4864 dma_desc_cnt = 0; 4865 4866 for_each_sgtable_dma_sg(sgt, sg, count) { 4867 len = sg_dma_len(sg); 4868 addr = sg_dma_address(sg); 4869 4870 if (len == 0) 4871 break; 4872 4873 while ((count + 1) < sgt->nents) { 4874 sg_next_iter = sg_next(sg); 4875 len_next = sg_dma_len(sg_next_iter); 4876 addr_next = sg_dma_address(sg_next_iter); 4877 4878 if (len_next == 0) 4879 break; 4880 4881 if ((addr + len == addr_next) && 4882 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4883 len += len_next; 4884 count++; 4885 sg = sg_next_iter; 4886 } else { 4887 break; 4888 } 4889 } 4890 4891 dma_desc_cnt++; 4892 } 4893 4894 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4895 } 4896 4897 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4898 struct hl_cs_parser *parser, 4899 struct packet_lin_dma *user_dma_pkt, 4900 u64 addr, enum dma_data_direction dir) 4901 { 4902 struct hl_userptr *userptr; 4903 int rc; 4904 4905 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4906 parser->job_userptr_list, &userptr)) 4907 goto already_pinned; 4908 4909 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4910 if (!userptr) 4911 return -ENOMEM; 4912 4913 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4914 userptr); 4915 if (rc) 4916 goto free_userptr; 4917 4918 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4919 4920 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); 4921 if (rc) { 4922 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4923 goto unpin_memory; 4924 } 4925 4926 userptr->dma_mapped = true; 4927 userptr->dir = dir; 4928 4929 already_pinned: 4930 parser->patched_cb_size += 4931 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4932 4933 return 0; 4934 4935 unpin_memory: 4936 list_del(&userptr->job_node); 4937 hl_unpin_host_memory(hdev, userptr); 4938 free_userptr: 4939 kfree(userptr); 4940 return rc; 4941 } 4942 4943 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4944 struct hl_cs_parser *parser, 4945 struct packet_lin_dma *user_dma_pkt, 4946 bool src_in_host) 4947 { 4948 enum dma_data_direction dir; 4949 bool skip_host_mem_pin = false, user_memset; 4950 u64 addr; 4951 int rc = 0; 4952 4953 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 4954 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 4955 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 4956 4957 if (src_in_host) { 4958 if (user_memset) 4959 skip_host_mem_pin = true; 4960 4961 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 4962 dir = DMA_TO_DEVICE; 4963 addr = le64_to_cpu(user_dma_pkt->src_addr); 4964 } else { 4965 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 4966 dir = DMA_FROM_DEVICE; 4967 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4968 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4969 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4970 } 4971 4972 if (skip_host_mem_pin) 4973 parser->patched_cb_size += sizeof(*user_dma_pkt); 4974 else 4975 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 4976 addr, dir); 4977 4978 return rc; 4979 } 4980 4981 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 4982 struct hl_cs_parser *parser, 4983 struct packet_lin_dma *user_dma_pkt) 4984 { 4985 bool src_in_host = false; 4986 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4987 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4988 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4989 4990 dev_dbg(hdev->dev, "DMA packet details:\n"); 4991 dev_dbg(hdev->dev, "source == 0x%llx\n", 4992 le64_to_cpu(user_dma_pkt->src_addr)); 4993 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 4994 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 4995 4996 /* 4997 * Special handling for DMA with size 0. Bypass all validations 4998 * because no transactions will be done except for WR_COMP, which 4999 * is not a security issue 5000 */ 5001 if (!le32_to_cpu(user_dma_pkt->tsize)) { 5002 parser->patched_cb_size += sizeof(*user_dma_pkt); 5003 return 0; 5004 } 5005 5006 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5007 src_in_host = true; 5008 5009 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 5010 src_in_host); 5011 } 5012 5013 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 5014 struct hl_cs_parser *parser, 5015 struct packet_load_and_exe *user_pkt) 5016 { 5017 u32 cfg; 5018 5019 cfg = le32_to_cpu(user_pkt->cfg); 5020 5021 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5022 dev_err(hdev->dev, 5023 "User not allowed to use Load and Execute\n"); 5024 return -EPERM; 5025 } 5026 5027 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5028 5029 return 0; 5030 } 5031 5032 static int gaudi_validate_cb(struct hl_device *hdev, 5033 struct hl_cs_parser *parser, bool is_mmu) 5034 { 5035 u32 cb_parsed_length = 0; 5036 int rc = 0; 5037 5038 parser->patched_cb_size = 0; 5039 5040 /* cb_user_size is more than 0 so loop will always be executed */ 5041 while (cb_parsed_length < parser->user_cb_size) { 5042 enum packet_id pkt_id; 5043 u16 pkt_size; 5044 struct gaudi_packet *user_pkt; 5045 5046 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5047 5048 pkt_id = (enum packet_id) ( 5049 (le64_to_cpu(user_pkt->header) & 5050 PACKET_HEADER_PACKET_ID_MASK) >> 5051 PACKET_HEADER_PACKET_ID_SHIFT); 5052 5053 if (!validate_packet_id(pkt_id)) { 5054 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5055 rc = -EINVAL; 5056 break; 5057 } 5058 5059 pkt_size = gaudi_packet_sizes[pkt_id]; 5060 cb_parsed_length += pkt_size; 5061 if (cb_parsed_length > parser->user_cb_size) { 5062 dev_err(hdev->dev, 5063 "packet 0x%x is out of CB boundary\n", pkt_id); 5064 rc = -EINVAL; 5065 break; 5066 } 5067 5068 switch (pkt_id) { 5069 case PACKET_MSG_PROT: 5070 dev_err(hdev->dev, 5071 "User not allowed to use MSG_PROT\n"); 5072 rc = -EPERM; 5073 break; 5074 5075 case PACKET_CP_DMA: 5076 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5077 rc = -EPERM; 5078 break; 5079 5080 case PACKET_STOP: 5081 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5082 rc = -EPERM; 5083 break; 5084 5085 case PACKET_WREG_BULK: 5086 dev_err(hdev->dev, 5087 "User not allowed to use WREG_BULK\n"); 5088 rc = -EPERM; 5089 break; 5090 5091 case PACKET_LOAD_AND_EXE: 5092 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5093 (struct packet_load_and_exe *) user_pkt); 5094 break; 5095 5096 case PACKET_LIN_DMA: 5097 parser->contains_dma_pkt = true; 5098 if (is_mmu) 5099 parser->patched_cb_size += pkt_size; 5100 else 5101 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5102 (struct packet_lin_dma *) user_pkt); 5103 break; 5104 5105 case PACKET_WREG_32: 5106 case PACKET_MSG_LONG: 5107 case PACKET_MSG_SHORT: 5108 case PACKET_REPEAT: 5109 case PACKET_FENCE: 5110 case PACKET_NOP: 5111 case PACKET_ARB_POINT: 5112 parser->patched_cb_size += pkt_size; 5113 break; 5114 5115 default: 5116 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5117 pkt_id); 5118 rc = -EINVAL; 5119 break; 5120 } 5121 5122 if (rc) 5123 break; 5124 } 5125 5126 /* 5127 * The new CB should have space at the end for two MSG_PROT packets: 5128 * 1. Optional NOP padding for cacheline alignment 5129 * 2. A packet that will act as a completion packet 5130 * 3. A packet that will generate MSI interrupt 5131 */ 5132 if (parser->completion) 5133 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5134 parser->patched_cb_size); 5135 5136 return rc; 5137 } 5138 5139 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5140 struct hl_cs_parser *parser, 5141 struct packet_lin_dma *user_dma_pkt, 5142 struct packet_lin_dma *new_dma_pkt, 5143 u32 *new_dma_pkt_size) 5144 { 5145 struct hl_userptr *userptr; 5146 struct scatterlist *sg, *sg_next_iter; 5147 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5148 u64 len, len_next; 5149 dma_addr_t dma_addr, dma_addr_next; 5150 u64 device_memory_addr, addr; 5151 enum dma_data_direction dir; 5152 struct sg_table *sgt; 5153 bool src_in_host = false; 5154 bool skip_host_mem_pin = false; 5155 bool user_memset; 5156 5157 ctl = le32_to_cpu(user_dma_pkt->ctl); 5158 5159 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5160 src_in_host = true; 5161 5162 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5163 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5164 5165 if (src_in_host) { 5166 addr = le64_to_cpu(user_dma_pkt->src_addr); 5167 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5168 dir = DMA_TO_DEVICE; 5169 if (user_memset) 5170 skip_host_mem_pin = true; 5171 } else { 5172 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5173 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5174 dir = DMA_FROM_DEVICE; 5175 } 5176 5177 if ((!skip_host_mem_pin) && 5178 (!hl_userptr_is_pinned(hdev, addr, 5179 le32_to_cpu(user_dma_pkt->tsize), 5180 parser->job_userptr_list, &userptr))) { 5181 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5182 addr, user_dma_pkt->tsize); 5183 return -EFAULT; 5184 } 5185 5186 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5187 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5188 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5189 return 0; 5190 } 5191 5192 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5193 5194 sgt = userptr->sgt; 5195 dma_desc_cnt = 0; 5196 5197 for_each_sgtable_dma_sg(sgt, sg, count) { 5198 len = sg_dma_len(sg); 5199 dma_addr = sg_dma_address(sg); 5200 5201 if (len == 0) 5202 break; 5203 5204 while ((count + 1) < sgt->nents) { 5205 sg_next_iter = sg_next(sg); 5206 len_next = sg_dma_len(sg_next_iter); 5207 dma_addr_next = sg_dma_address(sg_next_iter); 5208 5209 if (len_next == 0) 5210 break; 5211 5212 if ((dma_addr + len == dma_addr_next) && 5213 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5214 len += len_next; 5215 count++; 5216 sg = sg_next_iter; 5217 } else { 5218 break; 5219 } 5220 } 5221 5222 ctl = le32_to_cpu(user_dma_pkt->ctl); 5223 if (likely(dma_desc_cnt)) 5224 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5225 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5226 new_dma_pkt->ctl = cpu_to_le32(ctl); 5227 new_dma_pkt->tsize = cpu_to_le32(len); 5228 5229 if (dir == DMA_TO_DEVICE) { 5230 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5231 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5232 } else { 5233 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5234 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5235 } 5236 5237 if (!user_memset) 5238 device_memory_addr += len; 5239 dma_desc_cnt++; 5240 new_dma_pkt++; 5241 } 5242 5243 if (!dma_desc_cnt) { 5244 dev_err(hdev->dev, 5245 "Error of 0 SG entries when patching DMA packet\n"); 5246 return -EFAULT; 5247 } 5248 5249 /* Fix the last dma packet - wrcomp must be as user set it */ 5250 new_dma_pkt--; 5251 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5252 5253 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5254 5255 return 0; 5256 } 5257 5258 static int gaudi_patch_cb(struct hl_device *hdev, 5259 struct hl_cs_parser *parser) 5260 { 5261 u32 cb_parsed_length = 0; 5262 u32 cb_patched_cur_length = 0; 5263 int rc = 0; 5264 5265 /* cb_user_size is more than 0 so loop will always be executed */ 5266 while (cb_parsed_length < parser->user_cb_size) { 5267 enum packet_id pkt_id; 5268 u16 pkt_size; 5269 u32 new_pkt_size = 0; 5270 struct gaudi_packet *user_pkt, *kernel_pkt; 5271 5272 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5273 kernel_pkt = parser->patched_cb->kernel_address + 5274 cb_patched_cur_length; 5275 5276 pkt_id = (enum packet_id) ( 5277 (le64_to_cpu(user_pkt->header) & 5278 PACKET_HEADER_PACKET_ID_MASK) >> 5279 PACKET_HEADER_PACKET_ID_SHIFT); 5280 5281 if (!validate_packet_id(pkt_id)) { 5282 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5283 rc = -EINVAL; 5284 break; 5285 } 5286 5287 pkt_size = gaudi_packet_sizes[pkt_id]; 5288 cb_parsed_length += pkt_size; 5289 if (cb_parsed_length > parser->user_cb_size) { 5290 dev_err(hdev->dev, 5291 "packet 0x%x is out of CB boundary\n", pkt_id); 5292 rc = -EINVAL; 5293 break; 5294 } 5295 5296 switch (pkt_id) { 5297 case PACKET_LIN_DMA: 5298 rc = gaudi_patch_dma_packet(hdev, parser, 5299 (struct packet_lin_dma *) user_pkt, 5300 (struct packet_lin_dma *) kernel_pkt, 5301 &new_pkt_size); 5302 cb_patched_cur_length += new_pkt_size; 5303 break; 5304 5305 case PACKET_MSG_PROT: 5306 dev_err(hdev->dev, 5307 "User not allowed to use MSG_PROT\n"); 5308 rc = -EPERM; 5309 break; 5310 5311 case PACKET_CP_DMA: 5312 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5313 rc = -EPERM; 5314 break; 5315 5316 case PACKET_STOP: 5317 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5318 rc = -EPERM; 5319 break; 5320 5321 case PACKET_WREG_32: 5322 case PACKET_WREG_BULK: 5323 case PACKET_MSG_LONG: 5324 case PACKET_MSG_SHORT: 5325 case PACKET_REPEAT: 5326 case PACKET_FENCE: 5327 case PACKET_NOP: 5328 case PACKET_ARB_POINT: 5329 case PACKET_LOAD_AND_EXE: 5330 memcpy(kernel_pkt, user_pkt, pkt_size); 5331 cb_patched_cur_length += pkt_size; 5332 break; 5333 5334 default: 5335 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5336 pkt_id); 5337 rc = -EINVAL; 5338 break; 5339 } 5340 5341 if (rc) 5342 break; 5343 } 5344 5345 return rc; 5346 } 5347 5348 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5349 struct hl_cs_parser *parser) 5350 { 5351 u64 handle; 5352 u32 patched_cb_size; 5353 struct hl_cb *user_cb; 5354 int rc; 5355 5356 /* 5357 * The new CB should have space at the end for two MSG_PROT packets: 5358 * 1. Optional NOP padding for cacheline alignment 5359 * 2. A packet that will act as a completion packet 5360 * 3. A packet that will generate MSI interrupt 5361 */ 5362 if (parser->completion) 5363 parser->patched_cb_size = parser->user_cb_size + 5364 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5365 else 5366 parser->patched_cb_size = parser->user_cb_size; 5367 5368 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5369 parser->patched_cb_size, false, false, 5370 &handle); 5371 5372 if (rc) { 5373 dev_err(hdev->dev, 5374 "Failed to allocate patched CB for DMA CS %d\n", 5375 rc); 5376 return rc; 5377 } 5378 5379 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5380 /* hl_cb_get should never fail */ 5381 if (!parser->patched_cb) { 5382 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5383 rc = -EFAULT; 5384 goto out; 5385 } 5386 5387 /* 5388 * We are protected from overflow because the check 5389 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5390 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5391 * 5392 * There is no option to reach here without going through that check because: 5393 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5394 * an external queue. 5395 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5396 */ 5397 memcpy(parser->patched_cb->kernel_address, 5398 parser->user_cb->kernel_address, 5399 parser->user_cb_size); 5400 5401 patched_cb_size = parser->patched_cb_size; 5402 5403 /* Validate patched CB instead of user CB */ 5404 user_cb = parser->user_cb; 5405 parser->user_cb = parser->patched_cb; 5406 rc = gaudi_validate_cb(hdev, parser, true); 5407 parser->user_cb = user_cb; 5408 5409 if (rc) { 5410 hl_cb_put(parser->patched_cb); 5411 goto out; 5412 } 5413 5414 if (patched_cb_size != parser->patched_cb_size) { 5415 dev_err(hdev->dev, "user CB size mismatch\n"); 5416 hl_cb_put(parser->patched_cb); 5417 rc = -EINVAL; 5418 goto out; 5419 } 5420 5421 out: 5422 /* 5423 * Always call cb destroy here because we still have 1 reference 5424 * to it by calling cb_get earlier. After the job will be completed, 5425 * cb_put will release it, but here we want to remove it from the 5426 * idr 5427 */ 5428 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5429 5430 return rc; 5431 } 5432 5433 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5434 struct hl_cs_parser *parser) 5435 { 5436 u64 handle; 5437 int rc; 5438 5439 rc = gaudi_validate_cb(hdev, parser, false); 5440 5441 if (rc) 5442 goto free_userptr; 5443 5444 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5445 parser->patched_cb_size, false, false, 5446 &handle); 5447 if (rc) { 5448 dev_err(hdev->dev, 5449 "Failed to allocate patched CB for DMA CS %d\n", rc); 5450 goto free_userptr; 5451 } 5452 5453 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5454 /* hl_cb_get should never fail here */ 5455 if (!parser->patched_cb) { 5456 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5457 rc = -EFAULT; 5458 goto out; 5459 } 5460 5461 rc = gaudi_patch_cb(hdev, parser); 5462 5463 if (rc) 5464 hl_cb_put(parser->patched_cb); 5465 5466 out: 5467 /* 5468 * Always call cb destroy here because we still have 1 reference 5469 * to it by calling cb_get earlier. After the job will be completed, 5470 * cb_put will release it, but here we want to remove it from the 5471 * idr 5472 */ 5473 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5474 5475 free_userptr: 5476 if (rc) 5477 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5478 return rc; 5479 } 5480 5481 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5482 struct hl_cs_parser *parser) 5483 { 5484 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5485 struct gaudi_device *gaudi = hdev->asic_specific; 5486 u32 nic_queue_offset, nic_mask_q_id; 5487 5488 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5489 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5490 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5491 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5492 5493 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5494 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5495 return -EINVAL; 5496 } 5497 } 5498 5499 /* For internal queue jobs just check if CB address is valid */ 5500 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5501 parser->user_cb_size, 5502 asic_prop->sram_user_base_address, 5503 asic_prop->sram_end_address)) 5504 return 0; 5505 5506 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5507 parser->user_cb_size, 5508 asic_prop->dram_user_base_address, 5509 asic_prop->dram_end_address)) 5510 return 0; 5511 5512 /* PMMU and HPMMU addresses are equal, check only one of them */ 5513 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5514 parser->user_cb_size, 5515 asic_prop->pmmu.start_addr, 5516 asic_prop->pmmu.end_addr)) 5517 return 0; 5518 5519 dev_err(hdev->dev, 5520 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5521 parser->user_cb, parser->user_cb_size); 5522 5523 return -EFAULT; 5524 } 5525 5526 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5527 { 5528 struct gaudi_device *gaudi = hdev->asic_specific; 5529 5530 if (parser->queue_type == QUEUE_TYPE_INT) 5531 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5532 5533 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5534 return gaudi_parse_cb_mmu(hdev, parser); 5535 else 5536 return gaudi_parse_cb_no_mmu(hdev, parser); 5537 } 5538 5539 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5540 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5541 u32 msi_vec, bool eb) 5542 { 5543 struct packet_msg_prot *cq_pkt; 5544 struct packet_nop *cq_padding; 5545 u64 msi_addr; 5546 u32 tmp; 5547 5548 cq_padding = kernel_address + original_len; 5549 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5550 5551 while ((void *)cq_padding < (void *)cq_pkt) { 5552 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5553 cq_padding++; 5554 } 5555 5556 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5557 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5558 5559 if (eb) 5560 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5561 5562 cq_pkt->ctl = cpu_to_le32(tmp); 5563 cq_pkt->value = cpu_to_le32(cq_val); 5564 cq_pkt->addr = cpu_to_le64(cq_addr); 5565 5566 cq_pkt++; 5567 5568 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5569 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5570 cq_pkt->ctl = cpu_to_le32(tmp); 5571 cq_pkt->value = cpu_to_le32(1); 5572 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; 5573 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5574 } 5575 5576 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5577 { 5578 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5579 } 5580 5581 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5582 u32 size, u64 val) 5583 { 5584 struct packet_lin_dma *lin_dma_pkt; 5585 struct hl_cs_job *job; 5586 u32 cb_size, ctl, err_cause; 5587 struct hl_cb *cb; 5588 int rc; 5589 5590 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5591 if (!cb) 5592 return -EFAULT; 5593 5594 lin_dma_pkt = cb->kernel_address; 5595 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5596 cb_size = sizeof(*lin_dma_pkt); 5597 5598 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5599 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5600 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5601 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5602 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5603 5604 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5605 lin_dma_pkt->src_addr = cpu_to_le64(val); 5606 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5607 lin_dma_pkt->tsize = cpu_to_le32(size); 5608 5609 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5610 if (!job) { 5611 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5612 rc = -ENOMEM; 5613 goto release_cb; 5614 } 5615 5616 /* Verify DMA is OK */ 5617 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5618 if (err_cause && !hdev->init_done) { 5619 dev_dbg(hdev->dev, 5620 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5621 err_cause); 5622 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5623 } 5624 5625 job->id = 0; 5626 job->user_cb = cb; 5627 atomic_inc(&job->user_cb->cs_cnt); 5628 job->user_cb_size = cb_size; 5629 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5630 job->patched_cb = job->user_cb; 5631 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5632 5633 hl_debugfs_add_job(hdev, job); 5634 5635 rc = gaudi_send_job_on_qman0(hdev, job); 5636 hl_debugfs_remove_job(hdev, job); 5637 kfree(job); 5638 atomic_dec(&cb->cs_cnt); 5639 5640 /* Verify DMA is OK */ 5641 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5642 if (err_cause) { 5643 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5644 rc = -EIO; 5645 if (!hdev->init_done) { 5646 dev_dbg(hdev->dev, 5647 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5648 err_cause); 5649 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5650 } 5651 } 5652 5653 release_cb: 5654 hl_cb_put(cb); 5655 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5656 5657 return rc; 5658 } 5659 5660 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5661 u32 num_regs, u32 val) 5662 { 5663 struct packet_msg_long *pkt; 5664 struct hl_cs_job *job; 5665 u32 cb_size, ctl; 5666 struct hl_cb *cb; 5667 int i, rc; 5668 5669 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5670 5671 if (cb_size > SZ_2M) { 5672 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5673 return -ENOMEM; 5674 } 5675 5676 cb = hl_cb_kernel_create(hdev, cb_size, false); 5677 if (!cb) 5678 return -EFAULT; 5679 5680 pkt = cb->kernel_address; 5681 5682 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5683 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5684 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5685 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5686 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5687 5688 for (i = 0; i < num_regs ; i++, pkt++) { 5689 pkt->ctl = cpu_to_le32(ctl); 5690 pkt->value = cpu_to_le32(val); 5691 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5692 } 5693 5694 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5695 if (!job) { 5696 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5697 rc = -ENOMEM; 5698 goto release_cb; 5699 } 5700 5701 job->id = 0; 5702 job->user_cb = cb; 5703 atomic_inc(&job->user_cb->cs_cnt); 5704 job->user_cb_size = cb_size; 5705 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5706 job->patched_cb = job->user_cb; 5707 job->job_cb_size = cb_size; 5708 5709 hl_debugfs_add_job(hdev, job); 5710 5711 rc = gaudi_send_job_on_qman0(hdev, job); 5712 hl_debugfs_remove_job(hdev, job); 5713 kfree(job); 5714 atomic_dec(&cb->cs_cnt); 5715 5716 release_cb: 5717 hl_cb_put(cb); 5718 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5719 5720 return rc; 5721 } 5722 5723 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5724 { 5725 u64 base_addr; 5726 u32 num_regs; 5727 int rc; 5728 5729 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5730 num_regs = NUM_OF_SOB_IN_BLOCK; 5731 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5732 if (rc) { 5733 dev_err(hdev->dev, "failed resetting SM registers"); 5734 return -ENOMEM; 5735 } 5736 5737 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5738 num_regs = NUM_OF_SOB_IN_BLOCK; 5739 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5740 if (rc) { 5741 dev_err(hdev->dev, "failed resetting SM registers"); 5742 return -ENOMEM; 5743 } 5744 5745 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5746 num_regs = NUM_OF_SOB_IN_BLOCK; 5747 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5748 if (rc) { 5749 dev_err(hdev->dev, "failed resetting SM registers"); 5750 return -ENOMEM; 5751 } 5752 5753 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5754 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5755 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5756 if (rc) { 5757 dev_err(hdev->dev, "failed resetting SM registers"); 5758 return -ENOMEM; 5759 } 5760 5761 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5762 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5763 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5764 if (rc) { 5765 dev_err(hdev->dev, "failed resetting SM registers"); 5766 return -ENOMEM; 5767 } 5768 5769 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5770 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5771 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5772 if (rc) { 5773 dev_err(hdev->dev, "failed resetting SM registers"); 5774 return -ENOMEM; 5775 } 5776 5777 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5778 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5779 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5780 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5781 if (rc) { 5782 dev_err(hdev->dev, "failed resetting SM registers"); 5783 return -ENOMEM; 5784 } 5785 5786 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5787 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5788 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5789 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5790 if (rc) { 5791 dev_err(hdev->dev, "failed resetting SM registers"); 5792 return -ENOMEM; 5793 } 5794 5795 return 0; 5796 } 5797 5798 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5799 { 5800 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5801 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5802 int i; 5803 5804 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5805 u64 sob_addr = CFG_BASE + 5806 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5807 (i * sob_delta); 5808 u32 dma_offset = i * DMA_CORE_OFFSET; 5809 5810 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5811 lower_32_bits(sob_addr)); 5812 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5813 upper_32_bits(sob_addr)); 5814 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5815 5816 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5817 * modified by the user for SRAM reduction 5818 */ 5819 if (i > 1) 5820 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5821 0x00000001); 5822 } 5823 } 5824 5825 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5826 { 5827 u32 qman_offset; 5828 int i; 5829 5830 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5831 qman_offset = i * DMA_QMAN_OFFSET; 5832 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5833 } 5834 5835 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5836 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5837 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5838 } 5839 5840 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5841 qman_offset = i * TPC_QMAN_OFFSET; 5842 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5843 } 5844 5845 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5846 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5847 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5848 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5849 } 5850 } 5851 5852 static int gaudi_restore_user_registers(struct hl_device *hdev) 5853 { 5854 int rc; 5855 5856 rc = gaudi_restore_sm_registers(hdev); 5857 if (rc) 5858 return rc; 5859 5860 gaudi_restore_dma_registers(hdev); 5861 gaudi_restore_qm_registers(hdev); 5862 5863 return 0; 5864 } 5865 5866 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5867 { 5868 return 0; 5869 } 5870 5871 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5872 { 5873 u32 size = hdev->asic_prop.mmu_pgt_size + 5874 hdev->asic_prop.mmu_cache_mng_size; 5875 struct gaudi_device *gaudi = hdev->asic_specific; 5876 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5877 5878 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5879 return 0; 5880 5881 return gaudi_memset_device_memory(hdev, addr, size, 0); 5882 } 5883 5884 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5885 { 5886 5887 } 5888 5889 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5890 u32 size_to_dma, dma_addr_t dma_addr) 5891 { 5892 u32 err_cause, val; 5893 u64 dma_offset; 5894 int rc; 5895 5896 dma_offset = dma_id * DMA_CORE_OFFSET; 5897 5898 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5899 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5900 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5901 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5902 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5903 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5904 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5905 5906 rc = hl_poll_timeout( 5907 hdev, 5908 mmDMA0_CORE_STS0 + dma_offset, 5909 val, 5910 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5911 0, 5912 1000000); 5913 5914 if (rc) { 5915 dev_err(hdev->dev, 5916 "DMA %d timed-out during reading of 0x%llx\n", 5917 dma_id, addr); 5918 return -EIO; 5919 } 5920 5921 /* Verify DMA is OK */ 5922 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5923 if (err_cause) { 5924 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5925 dev_dbg(hdev->dev, 5926 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5927 err_cause); 5928 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5929 5930 return -EIO; 5931 } 5932 5933 return 0; 5934 } 5935 5936 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5937 void *blob_addr) 5938 { 5939 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5940 u32 qm_glbl_sts0, qm_cgm_sts; 5941 u64 dma_offset, qm_offset; 5942 dma_addr_t dma_addr; 5943 void *kernel_addr; 5944 bool is_eng_idle; 5945 int rc = 0, dma_id; 5946 5947 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 5948 5949 if (!kernel_addr) 5950 return -ENOMEM; 5951 5952 hdev->asic_funcs->hw_queues_lock(hdev); 5953 5954 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 5955 dma_offset = dma_id * DMA_CORE_OFFSET; 5956 qm_offset = dma_id * DMA_QMAN_OFFSET; 5957 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5958 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5959 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5960 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5961 IS_DMA_IDLE(dma_core_sts0); 5962 5963 if (!is_eng_idle) { 5964 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 5965 dma_offset = dma_id * DMA_CORE_OFFSET; 5966 qm_offset = dma_id * DMA_QMAN_OFFSET; 5967 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5968 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5969 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5970 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5971 IS_DMA_IDLE(dma_core_sts0); 5972 5973 if (!is_eng_idle) { 5974 dev_err_ratelimited(hdev->dev, 5975 "Can't read via DMA because it is BUSY\n"); 5976 rc = -EAGAIN; 5977 goto out; 5978 } 5979 } 5980 5981 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 5982 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 5983 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 5984 5985 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 5986 * using the compute ctx ASID, if exists. If not, use the kernel ctx 5987 * ASID 5988 */ 5989 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 5990 5991 /* Verify DMA is OK */ 5992 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5993 if (err_cause) { 5994 dev_dbg(hdev->dev, 5995 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5996 err_cause); 5997 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5998 } 5999 6000 pos = 0; 6001 size_left = size; 6002 size_to_dma = SZ_2M; 6003 6004 while (size_left > 0) { 6005 6006 if (size_left < SZ_2M) 6007 size_to_dma = size_left; 6008 6009 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 6010 dma_addr); 6011 if (rc) 6012 break; 6013 6014 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 6015 6016 if (size_left <= SZ_2M) 6017 break; 6018 6019 pos += SZ_2M; 6020 addr += SZ_2M; 6021 size_left -= SZ_2M; 6022 } 6023 6024 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6025 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6026 * ASID 6027 */ 6028 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6029 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6030 6031 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6032 6033 out: 6034 hdev->asic_funcs->hw_queues_unlock(hdev); 6035 6036 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6037 6038 return rc; 6039 } 6040 6041 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6042 { 6043 struct gaudi_device *gaudi = hdev->asic_specific; 6044 6045 if (hdev->reset_info.hard_reset_pending) 6046 return U64_MAX; 6047 6048 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6049 (addr - gaudi->hbm_bar_cur_addr)); 6050 } 6051 6052 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6053 { 6054 struct gaudi_device *gaudi = hdev->asic_specific; 6055 6056 if (hdev->reset_info.hard_reset_pending) 6057 return; 6058 6059 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6060 (addr - gaudi->hbm_bar_cur_addr)); 6061 } 6062 6063 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6064 { 6065 /* mask to zero the MMBP and ASID bits */ 6066 WREG32_AND(reg, ~0x7FF); 6067 WREG32_OR(reg, asid); 6068 } 6069 6070 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6071 { 6072 struct gaudi_device *gaudi = hdev->asic_specific; 6073 6074 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6075 return; 6076 6077 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6078 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6079 return; 6080 } 6081 6082 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6083 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6084 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6085 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6086 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6087 6088 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6089 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6090 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6091 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6092 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6093 6094 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6095 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6096 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6097 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6098 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6099 6100 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6101 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6102 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6103 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6104 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6105 6106 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6107 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6108 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6109 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6110 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6111 6112 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6113 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6114 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6115 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6116 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6117 6118 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6119 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6120 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6121 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6122 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6123 6124 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6125 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6126 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6127 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6128 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6129 6130 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6131 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6132 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6133 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6134 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6135 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6136 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6137 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6138 6139 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6140 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6141 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6142 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6143 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6144 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6145 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6146 6147 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6149 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6151 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6152 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6154 6155 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6159 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6160 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6161 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6162 6163 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6167 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6168 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6170 6171 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6173 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6175 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6176 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6178 6179 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6183 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6184 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6185 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6186 6187 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6191 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6192 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6194 6195 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6202 6203 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6207 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6208 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6209 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6210 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6211 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6212 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6213 6214 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6215 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6216 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6217 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6218 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6219 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6220 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6221 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6222 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6223 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6224 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6225 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6226 6227 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6229 asid); 6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6231 asid); 6232 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6233 asid); 6234 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6235 asid); 6236 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6237 asid); 6238 } 6239 6240 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6241 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6242 asid); 6243 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6244 asid); 6245 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6246 asid); 6247 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6248 asid); 6249 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6250 asid); 6251 } 6252 6253 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6255 asid); 6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6257 asid); 6258 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6259 asid); 6260 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6261 asid); 6262 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6263 asid); 6264 } 6265 6266 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6267 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6268 asid); 6269 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6270 asid); 6271 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6272 asid); 6273 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6274 asid); 6275 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6276 asid); 6277 } 6278 6279 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6281 asid); 6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6283 asid); 6284 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6285 asid); 6286 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6287 asid); 6288 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6289 asid); 6290 } 6291 6292 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6293 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6294 asid); 6295 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6296 asid); 6297 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6298 asid); 6299 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6300 asid); 6301 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6302 asid); 6303 } 6304 6305 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6307 asid); 6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6309 asid); 6310 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6311 asid); 6312 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6313 asid); 6314 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6315 asid); 6316 } 6317 6318 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6319 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6320 asid); 6321 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6322 asid); 6323 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6324 asid); 6325 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6326 asid); 6327 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6328 asid); 6329 } 6330 6331 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6333 asid); 6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6335 asid); 6336 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6337 asid); 6338 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6339 asid); 6340 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6341 asid); 6342 } 6343 6344 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6345 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6346 asid); 6347 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6348 asid); 6349 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6350 asid); 6351 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6352 asid); 6353 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6354 asid); 6355 } 6356 6357 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6358 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6359 } 6360 6361 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6362 struct hl_cs_job *job) 6363 { 6364 struct packet_msg_prot *fence_pkt; 6365 u32 *fence_ptr; 6366 dma_addr_t fence_dma_addr; 6367 struct hl_cb *cb; 6368 u32 tmp, timeout, dma_offset; 6369 int rc; 6370 6371 if (hdev->pldm) 6372 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6373 else 6374 timeout = HL_DEVICE_TIMEOUT_USEC; 6375 6376 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6377 if (!fence_ptr) { 6378 dev_err(hdev->dev, 6379 "Failed to allocate fence memory for QMAN0\n"); 6380 return -ENOMEM; 6381 } 6382 6383 cb = job->patched_cb; 6384 6385 fence_pkt = cb->kernel_address + 6386 job->job_cb_size - sizeof(struct packet_msg_prot); 6387 6388 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6389 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6390 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6391 6392 fence_pkt->ctl = cpu_to_le32(tmp); 6393 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6394 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6395 6396 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6397 6398 WREG32(mmDMA0_CORE_PROT + dma_offset, 6399 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6400 6401 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6402 job->job_cb_size, cb->bus_address); 6403 if (rc) { 6404 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6405 goto free_fence_ptr; 6406 } 6407 6408 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6409 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6410 timeout, true); 6411 6412 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6413 6414 if (rc == -ETIMEDOUT) { 6415 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6416 goto free_fence_ptr; 6417 } 6418 6419 free_fence_ptr: 6420 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6421 6422 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6423 return rc; 6424 } 6425 6426 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6427 { 6428 if (event_type >= GAUDI_EVENT_SIZE) 6429 goto event_not_supported; 6430 6431 if (!gaudi_irq_map_table[event_type].valid) 6432 goto event_not_supported; 6433 6434 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6435 6436 return; 6437 6438 event_not_supported: 6439 snprintf(desc, size, "N/A"); 6440 } 6441 6442 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6443 bool is_write, u16 *engine_id_1, 6444 u16 *engine_id_2) 6445 { 6446 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6447 6448 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6449 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6450 6451 switch (x_y) { 6452 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6453 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6454 dma_id[0] = 0; 6455 dma_id[1] = 2; 6456 break; 6457 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6458 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6459 dma_id[0] = 1; 6460 dma_id[1] = 3; 6461 break; 6462 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6463 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6464 dma_id[0] = 4; 6465 dma_id[1] = 6; 6466 break; 6467 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6468 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6469 dma_id[0] = 5; 6470 dma_id[1] = 7; 6471 break; 6472 default: 6473 goto unknown_initiator; 6474 } 6475 6476 for (i = 0 ; i < 2 ; i++) { 6477 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6478 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6479 } 6480 6481 switch (x_y) { 6482 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6483 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6484 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6485 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6486 return "DMA0"; 6487 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6488 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6489 return "DMA2"; 6490 } else { 6491 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6492 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6493 return "DMA0 or DMA2"; 6494 } 6495 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6496 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6497 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6498 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6499 return "DMA1"; 6500 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6501 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6502 return "DMA3"; 6503 } else { 6504 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6505 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6506 return "DMA1 or DMA3"; 6507 } 6508 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6509 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6510 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6511 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6512 return "DMA4"; 6513 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6514 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6515 return "DMA6"; 6516 } else { 6517 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6518 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6519 return "DMA4 or DMA6"; 6520 } 6521 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6522 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6523 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6524 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6525 return "DMA5"; 6526 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6527 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6528 return "DMA7"; 6529 } else { 6530 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6531 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6532 return "DMA5 or DMA7"; 6533 } 6534 } 6535 6536 unknown_initiator: 6537 return "unknown initiator"; 6538 } 6539 6540 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6541 u16 *engine_id_1, u16 *engine_id_2) 6542 { 6543 u32 val, x_y, axi_id; 6544 6545 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6546 RREG32(mmMMU_UP_RAZWI_READ_ID); 6547 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6548 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6549 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6550 RAZWI_INITIATOR_AXI_ID_SHIFT); 6551 6552 switch (x_y) { 6553 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6554 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6555 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6556 return "TPC0"; 6557 } 6558 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6559 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6560 return "NIC0"; 6561 } 6562 break; 6563 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6564 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6565 return "TPC1"; 6566 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6567 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6568 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6569 return "MME0"; 6570 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6571 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6572 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6573 return "MME1"; 6574 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6575 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6576 return "TPC2"; 6577 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6578 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6579 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6580 return "TPC3"; 6581 } 6582 /* PCI, CPU or PSOC does not have engine id*/ 6583 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6584 return "PCI"; 6585 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6586 return "CPU"; 6587 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6588 return "PSOC"; 6589 break; 6590 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6591 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6592 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6593 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6594 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6595 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6596 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6597 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6598 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6599 engine_id_1, engine_id_2); 6600 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6601 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6602 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6603 return "TPC4"; 6604 } 6605 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6606 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6607 return "NIC1"; 6608 } 6609 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6610 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6611 return "NIC2"; 6612 } 6613 break; 6614 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6615 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6616 return "TPC5"; 6617 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6618 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6619 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6620 return "MME2"; 6621 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6622 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6623 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6624 return "MME3"; 6625 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6626 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6627 return "TPC6"; 6628 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6629 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6630 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6631 return "TPC7"; 6632 } 6633 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6634 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6635 return "NIC4"; 6636 } 6637 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6638 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6639 return "NIC5"; 6640 } 6641 break; 6642 default: 6643 break; 6644 } 6645 6646 dev_err(hdev->dev, 6647 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6648 val, 6649 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6650 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6651 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6652 RAZWI_INITIATOR_AXI_ID_MASK); 6653 6654 return "unknown initiator"; 6655 } 6656 6657 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6658 u16 *engine_id_2, bool *is_read, bool *is_write) 6659 { 6660 6661 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6662 dev_err_ratelimited(hdev->dev, 6663 "RAZWI event caused by illegal write of %s\n", 6664 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6665 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6666 *is_write = true; 6667 } 6668 6669 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6670 dev_err_ratelimited(hdev->dev, 6671 "RAZWI event caused by illegal read of %s\n", 6672 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6673 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6674 *is_read = true; 6675 } 6676 } 6677 6678 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6679 { 6680 struct gaudi_device *gaudi = hdev->asic_specific; 6681 u32 val; 6682 6683 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6684 return; 6685 6686 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6687 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6688 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6689 *addr <<= 32; 6690 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6691 6692 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6693 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6694 6695 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6696 } 6697 6698 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6699 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6700 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6701 *addr <<= 32; 6702 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6703 6704 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6705 6706 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6707 } 6708 } 6709 6710 /* 6711 * +-------------------+------------------------------------------------------+ 6712 * | Configuration Reg | Description | 6713 * | Address | | 6714 * +-------------------+------------------------------------------------------+ 6715 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6716 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6717 * | |0xF34 memory wrappers 63:32 | 6718 * | |0xF38 memory wrappers 95:64 | 6719 * | |0xF3C memory wrappers 127:96 | 6720 * +-------------------+------------------------------------------------------+ 6721 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6722 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6723 * | |0xF44 memory wrappers 63:32 | 6724 * | |0xF48 memory wrappers 95:64 | 6725 * | |0xF4C memory wrappers 127:96 | 6726 * +-------------------+------------------------------------------------------+ 6727 */ 6728 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6729 struct ecc_info_extract_params *params, u64 *ecc_address, 6730 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6731 { 6732 u32 i, num_mem_regs, reg, err_bit; 6733 u64 err_addr, err_word = 0; 6734 6735 num_mem_regs = params->num_memories / 32 + 6736 ((params->num_memories % 32) ? 1 : 0); 6737 6738 if (params->block_address >= CFG_BASE) 6739 params->block_address -= CFG_BASE; 6740 6741 if (params->derr) 6742 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6743 else 6744 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6745 6746 /* Set invalid wrapper index */ 6747 *memory_wrapper_idx = 0xFF; 6748 6749 /* Iterate through memory wrappers, a single bit must be set */ 6750 for (i = 0 ; i < num_mem_regs ; i++) { 6751 err_addr += i * 4; 6752 err_word = RREG32(err_addr); 6753 if (err_word) { 6754 err_bit = __ffs(err_word); 6755 *memory_wrapper_idx = err_bit + (32 * i); 6756 break; 6757 } 6758 } 6759 6760 if (*memory_wrapper_idx == 0xFF) { 6761 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6762 return -EINVAL; 6763 } 6764 6765 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6766 *memory_wrapper_idx); 6767 6768 *ecc_address = 6769 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6770 *ecc_syndrom = 6771 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6772 6773 /* Clear error indication */ 6774 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6775 if (params->derr) 6776 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6777 else 6778 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6779 6780 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6781 6782 return 0; 6783 } 6784 6785 /* 6786 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6787 * 6788 * @idx: the current pi/ci value 6789 * @q_len: the queue length (power of 2) 6790 * 6791 * @return the cyclically decremented index 6792 */ 6793 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6794 { 6795 u32 mask = q_len - 1; 6796 6797 /* 6798 * modular decrement is equivalent to adding (queue_size -1) 6799 * later we take LSBs to make sure the value is in the 6800 * range [0, queue_len - 1] 6801 */ 6802 return (idx + q_len - 1) & mask; 6803 } 6804 6805 /** 6806 * gaudi_handle_sw_config_stream_data - print SW config stream data 6807 * 6808 * @hdev: pointer to the habanalabs device structure 6809 * @stream: the QMAN's stream 6810 * @qman_base: base address of QMAN registers block 6811 * @event_mask: mask of the last events occurred 6812 */ 6813 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6814 u64 qman_base, u64 event_mask) 6815 { 6816 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6817 u32 cq_ptr_lo_off, size; 6818 6819 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6820 6821 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6822 stream * cq_ptr_lo_off; 6823 cq_ptr_hi = cq_ptr_lo + 6824 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6825 cq_tsize = cq_ptr_lo + 6826 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6827 6828 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6829 size = RREG32(cq_tsize); 6830 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6831 stream, cq_ptr, size); 6832 6833 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6834 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6835 hdev->captured_err_info.undef_opcode.cq_size = size; 6836 hdev->captured_err_info.undef_opcode.stream_id = stream; 6837 } 6838 } 6839 6840 /** 6841 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6842 * 6843 * @hdev: pointer to the habanalabs device structure 6844 * @qid_base: first QID of the QMAN (out of 4 streams) 6845 * @stream: the QMAN's stream 6846 * @qman_base: base address of QMAN registers block 6847 * @event_mask: mask of the last events occurred 6848 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6849 */ 6850 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6851 u32 stream, u64 qman_base, 6852 u64 event_mask, 6853 bool pr_sw_conf) 6854 { 6855 u32 ci, qm_ci_stream_off, queue_len; 6856 struct hl_hw_queue *q; 6857 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6858 int i; 6859 6860 q = &hdev->kernel_queues[qid_base + stream]; 6861 6862 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6863 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6864 stream * qm_ci_stream_off; 6865 6866 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6867 q->int_queue_len : HL_QUEUE_LENGTH; 6868 6869 hdev->asic_funcs->hw_queues_lock(hdev); 6870 6871 if (pr_sw_conf) 6872 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6873 6874 ci = RREG32(pq_ci); 6875 6876 /* we should start printing form ci -1 */ 6877 ci = gaudi_queue_idx_dec(ci, queue_len); 6878 memset(addr, 0, sizeof(addr)); 6879 6880 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6881 struct hl_bd *bd; 6882 u32 len; 6883 6884 bd = q->kernel_address; 6885 bd += ci; 6886 6887 len = le32_to_cpu(bd->len); 6888 /* len 0 means uninitialized entry- break */ 6889 if (!len) 6890 break; 6891 6892 addr[i] = le64_to_cpu(bd->ptr); 6893 6894 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6895 stream, ci, addr[i], len); 6896 6897 /* get previous ci, wrap if needed */ 6898 ci = gaudi_queue_idx_dec(ci, queue_len); 6899 } 6900 6901 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6902 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6903 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6904 6905 if (arr_idx == 0) { 6906 undef_opcode->timestamp = ktime_get(); 6907 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6908 } 6909 6910 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6911 undef_opcode->cb_addr_streams_len++; 6912 } 6913 6914 hdev->asic_funcs->hw_queues_unlock(hdev); 6915 } 6916 6917 /** 6918 * handle_qman_data_on_err - extract QMAN data on error 6919 * 6920 * @hdev: pointer to the habanalabs device structure 6921 * @qid_base: first QID of the QMAN (out of 4 streams) 6922 * @stream: the QMAN's stream 6923 * @qman_base: base address of QMAN registers block 6924 * @event_mask: mask of the last events occurred 6925 * 6926 * This function attempt to exatract as much data as possible on QMAN error. 6927 * On upper CP print the SW config stream data and last 8 PQEs. 6928 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6929 */ 6930 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6931 u32 stream, u64 qman_base, u64 event_mask) 6932 { 6933 u32 i; 6934 6935 if (stream != QMAN_STREAMS) { 6936 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6937 qman_base, event_mask, true); 6938 return; 6939 } 6940 6941 /* handle Lower-CP */ 6942 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6943 6944 for (i = 0; i < QMAN_STREAMS; i++) 6945 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 6946 qman_base, event_mask, false); 6947 } 6948 6949 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 6950 const char *qm_name, 6951 u64 qman_base, 6952 u32 qid_base, 6953 u64 *event_mask) 6954 { 6955 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 6956 u64 glbl_sts_addr, arb_err_addr; 6957 char reg_desc[32]; 6958 6959 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 6960 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 6961 6962 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 6963 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 6964 glbl_sts_clr_val = 0; 6965 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 6966 6967 if (!glbl_sts_val) 6968 continue; 6969 6970 if (i == QMAN_STREAMS) 6971 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 6972 else 6973 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 6974 6975 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 6976 if (glbl_sts_val & BIT(j)) { 6977 dev_err_ratelimited(hdev->dev, 6978 "%s %s. err cause: %s\n", 6979 qm_name, reg_desc, 6980 gaudi_qman_error_cause[j]); 6981 glbl_sts_clr_val |= BIT(j); 6982 } 6983 } 6984 /* check for undefined opcode */ 6985 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 6986 hdev->captured_err_info.undef_opcode.write_enable) { 6987 memset(&hdev->captured_err_info.undef_opcode, 0, 6988 sizeof(hdev->captured_err_info.undef_opcode)); 6989 6990 hdev->captured_err_info.undef_opcode.write_enable = false; 6991 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 6992 } 6993 6994 /* Write 1 clear errors */ 6995 if (!hdev->stop_on_err) 6996 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 6997 else 6998 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 6999 } 7000 7001 arb_err_val = RREG32(arb_err_addr); 7002 7003 if (!arb_err_val) 7004 return; 7005 7006 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 7007 if (arb_err_val & BIT(j)) { 7008 dev_err_ratelimited(hdev->dev, 7009 "%s ARB_ERR. err cause: %s\n", 7010 qm_name, 7011 gaudi_qman_arb_error_cause[j]); 7012 } 7013 } 7014 } 7015 7016 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 7017 struct hl_eq_sm_sei_data *sei_data) 7018 { 7019 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7020 7021 /* Flip the bits as the enum is ordered in the opposite way */ 7022 index = (index ^ 0x3) & 0x3; 7023 7024 switch (sei_data->sei_cause) { 7025 case SM_SEI_SO_OVERFLOW: 7026 dev_err_ratelimited(hdev->dev, 7027 "%s SEI Error: SOB Group %u overflow/underflow", 7028 gaudi_sync_manager_names[index], 7029 le32_to_cpu(sei_data->sei_log)); 7030 break; 7031 case SM_SEI_LBW_4B_UNALIGNED: 7032 dev_err_ratelimited(hdev->dev, 7033 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7034 gaudi_sync_manager_names[index], 7035 le32_to_cpu(sei_data->sei_log)); 7036 break; 7037 case SM_SEI_AXI_RESPONSE_ERR: 7038 dev_err_ratelimited(hdev->dev, 7039 "%s SEI Error: AXI ID %u response error", 7040 gaudi_sync_manager_names[index], 7041 le32_to_cpu(sei_data->sei_log)); 7042 break; 7043 default: 7044 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7045 le32_to_cpu(sei_data->sei_log)); 7046 break; 7047 } 7048 } 7049 7050 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7051 struct hl_eq_ecc_data *ecc_data) 7052 { 7053 struct ecc_info_extract_params params; 7054 u64 ecc_address = 0, ecc_syndrom = 0; 7055 u8 index, memory_wrapper_idx = 0; 7056 bool extract_info_from_fw; 7057 int rc; 7058 7059 if (hdev->asic_prop.fw_security_enabled) { 7060 extract_info_from_fw = true; 7061 goto extract_ecc_info; 7062 } 7063 7064 switch (event_type) { 7065 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7066 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7067 extract_info_from_fw = true; 7068 break; 7069 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7070 index = event_type - GAUDI_EVENT_TPC0_SERR; 7071 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7072 params.num_memories = 90; 7073 params.derr = false; 7074 extract_info_from_fw = false; 7075 break; 7076 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7077 index = event_type - GAUDI_EVENT_TPC0_DERR; 7078 params.block_address = 7079 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7080 params.num_memories = 90; 7081 params.derr = true; 7082 extract_info_from_fw = false; 7083 break; 7084 case GAUDI_EVENT_MME0_ACC_SERR: 7085 case GAUDI_EVENT_MME1_ACC_SERR: 7086 case GAUDI_EVENT_MME2_ACC_SERR: 7087 case GAUDI_EVENT_MME3_ACC_SERR: 7088 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7089 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7090 params.num_memories = 128; 7091 params.derr = false; 7092 extract_info_from_fw = false; 7093 break; 7094 case GAUDI_EVENT_MME0_ACC_DERR: 7095 case GAUDI_EVENT_MME1_ACC_DERR: 7096 case GAUDI_EVENT_MME2_ACC_DERR: 7097 case GAUDI_EVENT_MME3_ACC_DERR: 7098 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7099 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7100 params.num_memories = 128; 7101 params.derr = true; 7102 extract_info_from_fw = false; 7103 break; 7104 case GAUDI_EVENT_MME0_SBAB_SERR: 7105 case GAUDI_EVENT_MME1_SBAB_SERR: 7106 case GAUDI_EVENT_MME2_SBAB_SERR: 7107 case GAUDI_EVENT_MME3_SBAB_SERR: 7108 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7109 params.block_address = 7110 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7111 params.num_memories = 33; 7112 params.derr = false; 7113 extract_info_from_fw = false; 7114 break; 7115 case GAUDI_EVENT_MME0_SBAB_DERR: 7116 case GAUDI_EVENT_MME1_SBAB_DERR: 7117 case GAUDI_EVENT_MME2_SBAB_DERR: 7118 case GAUDI_EVENT_MME3_SBAB_DERR: 7119 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7120 params.block_address = 7121 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7122 params.num_memories = 33; 7123 params.derr = true; 7124 extract_info_from_fw = false; 7125 break; 7126 default: 7127 return; 7128 } 7129 7130 extract_ecc_info: 7131 if (extract_info_from_fw) { 7132 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7133 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7134 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7135 } else { 7136 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7137 &ecc_syndrom, &memory_wrapper_idx); 7138 if (rc) 7139 return; 7140 } 7141 7142 dev_err(hdev->dev, 7143 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7144 ecc_address, ecc_syndrom, memory_wrapper_idx); 7145 } 7146 7147 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7148 { 7149 u64 qman_base; 7150 char desc[32]; 7151 u32 qid_base; 7152 u8 index; 7153 7154 switch (event_type) { 7155 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7156 index = event_type - GAUDI_EVENT_TPC0_QM; 7157 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7158 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7159 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7160 break; 7161 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7162 if (event_type == GAUDI_EVENT_MME0_QM) { 7163 index = 0; 7164 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7165 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7166 index = 2; 7167 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7168 } 7169 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7170 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7171 break; 7172 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7173 index = event_type - GAUDI_EVENT_DMA0_QM; 7174 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7175 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7176 if (index > 1) 7177 qid_base++; 7178 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7179 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7180 break; 7181 case GAUDI_EVENT_NIC0_QM0: 7182 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7183 qman_base = mmNIC0_QM0_BASE; 7184 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7185 break; 7186 case GAUDI_EVENT_NIC0_QM1: 7187 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7188 qman_base = mmNIC0_QM1_BASE; 7189 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7190 break; 7191 case GAUDI_EVENT_NIC1_QM0: 7192 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7193 qman_base = mmNIC1_QM0_BASE; 7194 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7195 break; 7196 case GAUDI_EVENT_NIC1_QM1: 7197 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7198 qman_base = mmNIC1_QM1_BASE; 7199 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7200 break; 7201 case GAUDI_EVENT_NIC2_QM0: 7202 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7203 qman_base = mmNIC2_QM0_BASE; 7204 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7205 break; 7206 case GAUDI_EVENT_NIC2_QM1: 7207 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7208 qman_base = mmNIC2_QM1_BASE; 7209 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7210 break; 7211 case GAUDI_EVENT_NIC3_QM0: 7212 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7213 qman_base = mmNIC3_QM0_BASE; 7214 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7215 break; 7216 case GAUDI_EVENT_NIC3_QM1: 7217 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7218 qman_base = mmNIC3_QM1_BASE; 7219 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7220 break; 7221 case GAUDI_EVENT_NIC4_QM0: 7222 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7223 qman_base = mmNIC4_QM0_BASE; 7224 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7225 break; 7226 case GAUDI_EVENT_NIC4_QM1: 7227 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7228 qman_base = mmNIC4_QM1_BASE; 7229 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7230 break; 7231 default: 7232 return; 7233 } 7234 7235 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7236 } 7237 7238 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7239 bool check_razwi, u64 *event_mask) 7240 { 7241 bool is_read = false, is_write = false; 7242 u16 engine_id[2], num_of_razwi_eng = 0; 7243 char desc[64] = ""; 7244 u64 razwi_addr = 0; 7245 u8 razwi_flags = 0; 7246 7247 /* 7248 * Init engine id by default as not valid and only if razwi initiated from engine with 7249 * engine id it will get valid value. 7250 */ 7251 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7252 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7253 7254 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7255 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7256 event_type, desc); 7257 7258 if (check_razwi) { 7259 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7260 &is_write); 7261 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7262 7263 if (is_read) 7264 razwi_flags |= HL_RAZWI_READ; 7265 if (is_write) 7266 razwi_flags |= HL_RAZWI_WRITE; 7267 7268 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7269 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7270 num_of_razwi_eng = 2; 7271 else 7272 num_of_razwi_eng = 1; 7273 } 7274 7275 if (razwi_flags) 7276 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7277 razwi_flags, event_mask); 7278 } 7279 } 7280 7281 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7282 struct cpucp_pkt_sync_err *sync_err) 7283 { 7284 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7285 7286 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7287 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7288 } 7289 7290 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7291 struct hl_eq_fw_alive *fw_alive) 7292 { 7293 dev_err(hdev->dev, 7294 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7295 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7296 le32_to_cpu(fw_alive->process_id), 7297 le32_to_cpu(fw_alive->thread_id), 7298 le64_to_cpu(fw_alive->uptime_seconds)); 7299 } 7300 7301 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7302 void *data) 7303 { 7304 char desc[64] = "", *type; 7305 struct eq_nic_sei_event *eq_nic_sei = data; 7306 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7307 7308 switch (eq_nic_sei->axi_error_cause) { 7309 case RXB: 7310 type = "RXB"; 7311 break; 7312 case RXE: 7313 type = "RXE"; 7314 break; 7315 case TXS: 7316 type = "TXS"; 7317 break; 7318 case TXE: 7319 type = "TXE"; 7320 break; 7321 case QPC_RESP: 7322 type = "QPC_RESP"; 7323 break; 7324 case NON_AXI_ERR: 7325 type = "NON_AXI_ERR"; 7326 break; 7327 case TMR: 7328 type = "TMR"; 7329 break; 7330 default: 7331 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7332 eq_nic_sei->axi_error_cause); 7333 type = "N/A"; 7334 break; 7335 } 7336 7337 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7338 eq_nic_sei->id); 7339 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7340 event_type, desc); 7341 } 7342 7343 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7344 { 7345 /* GAUDI doesn't support any reset except hard-reset */ 7346 return -EPERM; 7347 } 7348 7349 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7350 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7351 { 7352 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7353 int rc = 0; 7354 7355 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7356 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7357 if (!hbm_ecc_data) { 7358 dev_err(hdev->dev, "No FW ECC data"); 7359 return 0; 7360 } 7361 7362 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7363 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7364 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7365 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7366 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7367 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7368 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7369 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7370 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7371 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7372 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7373 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7374 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7375 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7376 7377 dev_err(hdev->dev, 7378 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7379 device, ch, wr_par, rd_par, ca_par, serr, derr); 7380 dev_err(hdev->dev, 7381 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7382 device, ch, hbm_ecc_data->first_addr, type, 7383 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7384 hbm_ecc_data->dec_cnt); 7385 return 0; 7386 } 7387 7388 if (hdev->asic_prop.fw_security_enabled) { 7389 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7390 return 0; 7391 } 7392 7393 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7394 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7395 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7396 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7397 if (val) { 7398 rc = -EIO; 7399 dev_err(hdev->dev, 7400 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7401 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7402 (val >> 2) & 0x1, (val >> 3) & 0x1, 7403 (val >> 4) & 0x1); 7404 7405 val2 = RREG32(base + ch * 0x1000 + 0x060); 7406 dev_err(hdev->dev, 7407 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7408 device, ch * 2, 7409 RREG32(base + ch * 0x1000 + 0x064), 7410 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7411 (val2 & 0xFF0000) >> 16, 7412 (val2 & 0xFF000000) >> 24); 7413 } 7414 7415 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7416 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7417 if (val) { 7418 rc = -EIO; 7419 dev_err(hdev->dev, 7420 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7421 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7422 (val >> 2) & 0x1, (val >> 3) & 0x1, 7423 (val >> 4) & 0x1); 7424 7425 val2 = RREG32(base + ch * 0x1000 + 0x070); 7426 dev_err(hdev->dev, 7427 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7428 device, ch * 2 + 1, 7429 RREG32(base + ch * 0x1000 + 0x074), 7430 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7431 (val2 & 0xFF0000) >> 16, 7432 (val2 & 0xFF000000) >> 24); 7433 } 7434 7435 /* Clear interrupts */ 7436 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7437 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7438 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7439 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7440 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7441 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7442 } 7443 7444 val = RREG32(base + 0x8F30); 7445 val2 = RREG32(base + 0x8F34); 7446 if (val | val2) { 7447 rc = -EIO; 7448 dev_err(hdev->dev, 7449 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7450 device, val, val2); 7451 } 7452 val = RREG32(base + 0x8F40); 7453 val2 = RREG32(base + 0x8F44); 7454 if (val | val2) { 7455 rc = -EIO; 7456 dev_err(hdev->dev, 7457 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7458 device, val, val2); 7459 } 7460 7461 return rc; 7462 } 7463 7464 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7465 { 7466 switch (hbm_event_type) { 7467 case GAUDI_EVENT_HBM0_SPI_0: 7468 case GAUDI_EVENT_HBM0_SPI_1: 7469 return 0; 7470 case GAUDI_EVENT_HBM1_SPI_0: 7471 case GAUDI_EVENT_HBM1_SPI_1: 7472 return 1; 7473 case GAUDI_EVENT_HBM2_SPI_0: 7474 case GAUDI_EVENT_HBM2_SPI_1: 7475 return 2; 7476 case GAUDI_EVENT_HBM3_SPI_0: 7477 case GAUDI_EVENT_HBM3_SPI_1: 7478 return 3; 7479 default: 7480 break; 7481 } 7482 7483 /* Should never happen */ 7484 return 0; 7485 } 7486 7487 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7488 char *interrupt_name) 7489 { 7490 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7491 bool soft_reset_required = false; 7492 7493 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7494 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7495 7496 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7497 if (tpc_interrupts_cause & BIT(i)) { 7498 dev_err_ratelimited(hdev->dev, 7499 "TPC%d_%s interrupt cause: %s\n", 7500 tpc_id, interrupt_name, 7501 gaudi_tpc_interrupts_cause[i]); 7502 /* If this is QM error, we need to soft-reset */ 7503 if (i == 15) 7504 soft_reset_required = true; 7505 } 7506 7507 /* Clear interrupts */ 7508 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7509 7510 return soft_reset_required; 7511 } 7512 7513 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7514 { 7515 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7516 } 7517 7518 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7519 { 7520 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7521 } 7522 7523 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7524 { 7525 ktime_t zero_time = ktime_set(0, 0); 7526 7527 mutex_lock(&hdev->clk_throttling.lock); 7528 7529 switch (event_type) { 7530 case GAUDI_EVENT_FIX_POWER_ENV_S: 7531 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7532 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7533 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7534 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7535 dev_info_ratelimited(hdev->dev, 7536 "Clock throttling due to power consumption\n"); 7537 break; 7538 7539 case GAUDI_EVENT_FIX_POWER_ENV_E: 7540 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7541 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7542 dev_info_ratelimited(hdev->dev, 7543 "Power envelop is safe, back to optimal clock\n"); 7544 break; 7545 7546 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7547 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7548 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7549 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7550 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7551 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7552 dev_info_ratelimited(hdev->dev, 7553 "Clock throttling due to overheating\n"); 7554 break; 7555 7556 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7557 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7558 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7559 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7560 dev_info_ratelimited(hdev->dev, 7561 "Thermal envelop is safe, back to optimal clock\n"); 7562 break; 7563 7564 default: 7565 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7566 event_type); 7567 break; 7568 } 7569 7570 mutex_unlock(&hdev->clk_throttling.lock); 7571 } 7572 7573 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7574 { 7575 struct gaudi_device *gaudi = hdev->asic_specific; 7576 struct hl_info_fw_err_info fw_err_info; 7577 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7578 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7579 u32 fw_fatal_err_flag = 0, flags = 0; 7580 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7581 >> EQ_CTL_EVENT_TYPE_SHIFT); 7582 bool reset_required, reset_direct = false; 7583 u8 cause; 7584 int rc; 7585 7586 if (event_type >= GAUDI_EVENT_SIZE) { 7587 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7588 event_type, GAUDI_EVENT_SIZE - 1); 7589 return; 7590 } 7591 7592 gaudi->events_stat[event_type]++; 7593 gaudi->events_stat_aggregate[event_type]++; 7594 7595 switch (event_type) { 7596 case GAUDI_EVENT_PCIE_CORE_DERR: 7597 case GAUDI_EVENT_PCIE_IF_DERR: 7598 case GAUDI_EVENT_PCIE_PHY_DERR: 7599 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7600 case GAUDI_EVENT_MME0_ACC_DERR: 7601 case GAUDI_EVENT_MME0_SBAB_DERR: 7602 case GAUDI_EVENT_MME1_ACC_DERR: 7603 case GAUDI_EVENT_MME1_SBAB_DERR: 7604 case GAUDI_EVENT_MME2_ACC_DERR: 7605 case GAUDI_EVENT_MME2_SBAB_DERR: 7606 case GAUDI_EVENT_MME3_ACC_DERR: 7607 case GAUDI_EVENT_MME3_SBAB_DERR: 7608 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7609 fallthrough; 7610 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7611 case GAUDI_EVENT_PSOC_MEM_DERR: 7612 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7613 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7614 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7615 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7616 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7617 case GAUDI_EVENT_MMU_DERR: 7618 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7619 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7620 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7621 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7622 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7623 goto reset_device; 7624 7625 case GAUDI_EVENT_GIC500: 7626 case GAUDI_EVENT_AXI_ECC: 7627 case GAUDI_EVENT_L2_RAM_ECC: 7628 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7629 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7630 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7631 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7632 goto reset_device; 7633 7634 case GAUDI_EVENT_HBM0_SPI_0: 7635 case GAUDI_EVENT_HBM1_SPI_0: 7636 case GAUDI_EVENT_HBM2_SPI_0: 7637 case GAUDI_EVENT_HBM3_SPI_0: 7638 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7639 gaudi_hbm_read_interrupts(hdev, 7640 gaudi_hbm_event_to_dev(event_type), 7641 &eq_entry->hbm_ecc_data); 7642 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7643 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7644 goto reset_device; 7645 7646 case GAUDI_EVENT_HBM0_SPI_1: 7647 case GAUDI_EVENT_HBM1_SPI_1: 7648 case GAUDI_EVENT_HBM2_SPI_1: 7649 case GAUDI_EVENT_HBM3_SPI_1: 7650 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7651 gaudi_hbm_read_interrupts(hdev, 7652 gaudi_hbm_event_to_dev(event_type), 7653 &eq_entry->hbm_ecc_data); 7654 hl_fw_unmask_irq(hdev, event_type); 7655 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7656 break; 7657 7658 case GAUDI_EVENT_TPC0_DEC: 7659 case GAUDI_EVENT_TPC1_DEC: 7660 case GAUDI_EVENT_TPC2_DEC: 7661 case GAUDI_EVENT_TPC3_DEC: 7662 case GAUDI_EVENT_TPC4_DEC: 7663 case GAUDI_EVENT_TPC5_DEC: 7664 case GAUDI_EVENT_TPC6_DEC: 7665 case GAUDI_EVENT_TPC7_DEC: 7666 /* In TPC DEC event, notify on TPC assertion. While there isn't 7667 * a specific event for assertion yet, the FW generates TPC DEC event. 7668 * The SW upper layer will inspect an internal mapped area to indicate 7669 * if the event is a TPC Assertion or a "real" TPC DEC. 7670 */ 7671 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7672 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7673 reset_required = gaudi_tpc_read_interrupts(hdev, 7674 tpc_dec_event_to_tpc_id(event_type), 7675 "AXI_SLV_DEC_Error"); 7676 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7677 if (reset_required) { 7678 dev_err(hdev->dev, "reset required due to %s\n", 7679 gaudi_irq_map_table[event_type].name); 7680 7681 reset_direct = true; 7682 goto reset_device; 7683 } else { 7684 hl_fw_unmask_irq(hdev, event_type); 7685 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7686 } 7687 break; 7688 7689 case GAUDI_EVENT_TPC0_KRN_ERR: 7690 case GAUDI_EVENT_TPC1_KRN_ERR: 7691 case GAUDI_EVENT_TPC2_KRN_ERR: 7692 case GAUDI_EVENT_TPC3_KRN_ERR: 7693 case GAUDI_EVENT_TPC4_KRN_ERR: 7694 case GAUDI_EVENT_TPC5_KRN_ERR: 7695 case GAUDI_EVENT_TPC6_KRN_ERR: 7696 case GAUDI_EVENT_TPC7_KRN_ERR: 7697 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7698 reset_required = gaudi_tpc_read_interrupts(hdev, 7699 tpc_krn_event_to_tpc_id(event_type), 7700 "KRN_ERR"); 7701 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7702 if (reset_required) { 7703 dev_err(hdev->dev, "reset required due to %s\n", 7704 gaudi_irq_map_table[event_type].name); 7705 7706 reset_direct = true; 7707 goto reset_device; 7708 } else { 7709 hl_fw_unmask_irq(hdev, event_type); 7710 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7711 } 7712 break; 7713 7714 case GAUDI_EVENT_PCIE_CORE_SERR: 7715 case GAUDI_EVENT_PCIE_IF_SERR: 7716 case GAUDI_EVENT_PCIE_PHY_SERR: 7717 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7718 case GAUDI_EVENT_MME0_ACC_SERR: 7719 case GAUDI_EVENT_MME0_SBAB_SERR: 7720 case GAUDI_EVENT_MME1_ACC_SERR: 7721 case GAUDI_EVENT_MME1_SBAB_SERR: 7722 case GAUDI_EVENT_MME2_ACC_SERR: 7723 case GAUDI_EVENT_MME2_SBAB_SERR: 7724 case GAUDI_EVENT_MME3_ACC_SERR: 7725 case GAUDI_EVENT_MME3_SBAB_SERR: 7726 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7727 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7728 case GAUDI_EVENT_PSOC_MEM_SERR: 7729 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7730 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7731 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7732 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7733 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7734 fallthrough; 7735 case GAUDI_EVENT_MMU_SERR: 7736 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7737 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7738 hl_fw_unmask_irq(hdev, event_type); 7739 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7740 break; 7741 7742 case GAUDI_EVENT_PCIE_DEC: 7743 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7744 case GAUDI_EVENT_PSOC_AXI_DEC: 7745 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7746 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7747 hl_fw_unmask_irq(hdev, event_type); 7748 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7749 break; 7750 7751 case GAUDI_EVENT_MMU_PAGE_FAULT: 7752 case GAUDI_EVENT_MMU_WR_PERM: 7753 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7754 hl_fw_unmask_irq(hdev, event_type); 7755 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7756 break; 7757 7758 case GAUDI_EVENT_MME0_WBC_RSP: 7759 case GAUDI_EVENT_MME0_SBAB0_RSP: 7760 case GAUDI_EVENT_MME1_WBC_RSP: 7761 case GAUDI_EVENT_MME1_SBAB0_RSP: 7762 case GAUDI_EVENT_MME2_WBC_RSP: 7763 case GAUDI_EVENT_MME2_SBAB0_RSP: 7764 case GAUDI_EVENT_MME3_WBC_RSP: 7765 case GAUDI_EVENT_MME3_SBAB0_RSP: 7766 case GAUDI_EVENT_RAZWI_OR_ADC: 7767 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7768 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7769 fallthrough; 7770 case GAUDI_EVENT_NIC0_QM0: 7771 case GAUDI_EVENT_NIC0_QM1: 7772 case GAUDI_EVENT_NIC1_QM0: 7773 case GAUDI_EVENT_NIC1_QM1: 7774 case GAUDI_EVENT_NIC2_QM0: 7775 case GAUDI_EVENT_NIC2_QM1: 7776 case GAUDI_EVENT_NIC3_QM0: 7777 case GAUDI_EVENT_NIC3_QM1: 7778 case GAUDI_EVENT_NIC4_QM0: 7779 case GAUDI_EVENT_NIC4_QM1: 7780 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7781 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7782 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7783 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7784 hl_fw_unmask_irq(hdev, event_type); 7785 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7786 break; 7787 7788 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7789 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7790 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7791 goto reset_device; 7792 7793 case GAUDI_EVENT_TPC0_BMON_SPMU: 7794 case GAUDI_EVENT_TPC1_BMON_SPMU: 7795 case GAUDI_EVENT_TPC2_BMON_SPMU: 7796 case GAUDI_EVENT_TPC3_BMON_SPMU: 7797 case GAUDI_EVENT_TPC4_BMON_SPMU: 7798 case GAUDI_EVENT_TPC5_BMON_SPMU: 7799 case GAUDI_EVENT_TPC6_BMON_SPMU: 7800 case GAUDI_EVENT_TPC7_BMON_SPMU: 7801 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7802 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7803 hl_fw_unmask_irq(hdev, event_type); 7804 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7805 break; 7806 7807 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7808 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7809 hl_fw_unmask_irq(hdev, event_type); 7810 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7811 break; 7812 7813 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7814 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7815 gaudi_print_sm_sei_info(hdev, event_type, 7816 &eq_entry->sm_sei_data); 7817 rc = hl_state_dump(hdev); 7818 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7819 if (rc) 7820 dev_err(hdev->dev, 7821 "Error during system state dump %d\n", rc); 7822 hl_fw_unmask_irq(hdev, event_type); 7823 break; 7824 7825 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7826 break; 7827 7828 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7829 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7830 hl_fw_unmask_irq(hdev, event_type); 7831 break; 7832 7833 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7834 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7835 dev_err(hdev->dev, 7836 "Received high temp H/W interrupt %d (cause %d)\n", 7837 event_type, cause); 7838 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7839 break; 7840 7841 case GAUDI_EVENT_DEV_RESET_REQ: 7842 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7843 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7844 goto reset_device; 7845 7846 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7847 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7848 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7849 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7850 goto reset_device; 7851 7852 case GAUDI_EVENT_FW_ALIVE_S: 7853 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7854 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7855 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7856 fw_err_info.event_id = event_type; 7857 fw_err_info.event_mask = &event_mask; 7858 hl_handle_fw_err(hdev, &fw_err_info); 7859 goto reset_device; 7860 7861 default: 7862 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7863 event_type); 7864 break; 7865 } 7866 7867 if (event_mask) 7868 hl_notifier_event_send_all(hdev, event_mask); 7869 7870 return; 7871 7872 reset_device: 7873 reset_required = true; 7874 7875 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7876 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7877 7878 /* notify on device unavailable while the reset triggered by fw */ 7879 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7880 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7881 } else if (hdev->hard_reset_on_fw_events) { 7882 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7883 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7884 } else { 7885 reset_required = false; 7886 } 7887 7888 if (reset_required) { 7889 /* escalate general hw errors to critical/fatal error */ 7890 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7891 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7892 7893 hl_device_cond_reset(hdev, flags, event_mask); 7894 } else { 7895 hl_fw_unmask_irq(hdev, event_type); 7896 /* Notification on occurred event needs to be sent although reset is not executed */ 7897 if (event_mask) 7898 hl_notifier_event_send_all(hdev, event_mask); 7899 } 7900 } 7901 7902 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7903 { 7904 struct gaudi_device *gaudi = hdev->asic_specific; 7905 7906 if (aggregate) { 7907 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7908 return gaudi->events_stat_aggregate; 7909 } 7910 7911 *size = (u32) sizeof(gaudi->events_stat); 7912 return gaudi->events_stat; 7913 } 7914 7915 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7916 { 7917 struct gaudi_device *gaudi = hdev->asic_specific; 7918 u32 status, timeout_usec; 7919 int rc; 7920 7921 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7922 hdev->reset_info.hard_reset_pending) 7923 return 0; 7924 7925 if (hdev->pldm) 7926 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7927 else 7928 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7929 7930 /* L0 & L1 invalidation */ 7931 WREG32(mmSTLB_INV_PS, 3); 7932 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7933 WREG32(mmSTLB_INV_PS, 2); 7934 7935 rc = hl_poll_timeout( 7936 hdev, 7937 mmSTLB_INV_PS, 7938 status, 7939 !status, 7940 1000, 7941 timeout_usec); 7942 7943 WREG32(mmSTLB_INV_SET, 0); 7944 7945 return rc; 7946 } 7947 7948 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 7949 bool is_hard, u32 flags, 7950 u32 asid, u64 va, u64 size) 7951 { 7952 /* Treat as invalidate all because there is no range invalidation 7953 * in Gaudi 7954 */ 7955 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 7956 } 7957 7958 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 7959 { 7960 u32 status, timeout_usec; 7961 int rc; 7962 7963 if (hdev->pldm) 7964 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7965 else 7966 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7967 7968 WREG32(MMU_ASID, asid); 7969 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 7970 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 7971 WREG32(MMU_BUSY, 0x80000000); 7972 7973 rc = hl_poll_timeout( 7974 hdev, 7975 MMU_BUSY, 7976 status, 7977 !(status & 0x80000000), 7978 1000, 7979 timeout_usec); 7980 7981 if (rc) { 7982 dev_err(hdev->dev, 7983 "Timeout during MMU hop0 config of asid %d\n", asid); 7984 return rc; 7985 } 7986 7987 return 0; 7988 } 7989 7990 static int gaudi_send_heartbeat(struct hl_device *hdev) 7991 { 7992 struct gaudi_device *gaudi = hdev->asic_specific; 7993 7994 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7995 return 0; 7996 7997 return hl_fw_send_heartbeat(hdev); 7998 } 7999 8000 static int gaudi_cpucp_info_get(struct hl_device *hdev) 8001 { 8002 struct gaudi_device *gaudi = hdev->asic_specific; 8003 struct asic_fixed_properties *prop = &hdev->asic_prop; 8004 int rc; 8005 8006 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8007 return 0; 8008 8009 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 8010 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 8011 mmCPU_BOOT_ERR1); 8012 if (rc) 8013 return rc; 8014 8015 if (!strlen(prop->cpucp_info.card_name)) 8016 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 8017 CARD_NAME_MAX_LEN); 8018 8019 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8020 8021 set_default_power_values(hdev); 8022 8023 return 0; 8024 } 8025 8026 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8027 struct engines_data *e) 8028 { 8029 struct gaudi_device *gaudi = hdev->asic_specific; 8030 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8031 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8032 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8033 unsigned long *mask = (unsigned long *)mask_arr; 8034 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8035 bool is_idle = true, is_eng_idle, is_slave; 8036 u64 offset; 8037 int i, dma_id, port; 8038 8039 if (e) 8040 hl_engine_data_sprintf(e, 8041 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8042 "--- ------- ------------ ---------- -------------\n"); 8043 8044 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8045 dma_id = gaudi_dma_assignment[i]; 8046 offset = dma_id * DMA_QMAN_OFFSET; 8047 8048 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8049 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8050 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8051 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8052 IS_DMA_IDLE(dma_core_sts0); 8053 is_idle &= is_eng_idle; 8054 8055 if (mask && !is_eng_idle) 8056 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8057 if (e) 8058 hl_engine_data_sprintf(e, fmt, dma_id, 8059 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8060 qm_cgm_sts, dma_core_sts0); 8061 } 8062 8063 if (e) 8064 hl_engine_data_sprintf(e, 8065 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8066 "--- ------- ------------ ---------- ----------\n"); 8067 8068 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8069 offset = i * TPC_QMAN_OFFSET; 8070 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8071 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8072 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8073 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8074 IS_TPC_IDLE(tpc_cfg_sts); 8075 is_idle &= is_eng_idle; 8076 8077 if (mask && !is_eng_idle) 8078 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8079 if (e) 8080 hl_engine_data_sprintf(e, fmt, i, 8081 is_eng_idle ? "Y" : "N", 8082 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8083 } 8084 8085 if (e) 8086 hl_engine_data_sprintf(e, 8087 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8088 "--- ------- ------------ ---------- -----------\n"); 8089 8090 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8091 offset = i * MME_QMAN_OFFSET; 8092 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8093 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8094 8095 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8096 is_slave = i % 2; 8097 if (!is_slave) { 8098 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8099 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8100 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8101 } 8102 8103 is_idle &= is_eng_idle; 8104 8105 if (mask && !is_eng_idle) 8106 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8107 if (e) { 8108 if (!is_slave) 8109 hl_engine_data_sprintf(e, fmt, i, 8110 is_eng_idle ? "Y" : "N", 8111 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8112 else 8113 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8114 is_eng_idle ? "Y" : "N", "-", 8115 "-", mme_arch_sts); 8116 } 8117 } 8118 8119 if (e) 8120 hl_engine_data_sprintf(e, 8121 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8122 "--- ------- ------------ ----------\n"); 8123 8124 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8125 offset = i * NIC_MACRO_QMAN_OFFSET; 8126 port = 2 * i; 8127 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8128 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8129 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8130 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8131 is_idle &= is_eng_idle; 8132 8133 if (mask && !is_eng_idle) 8134 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8135 if (e) 8136 hl_engine_data_sprintf(e, nic_fmt, port, 8137 is_eng_idle ? "Y" : "N", 8138 qm_glbl_sts0, qm_cgm_sts); 8139 } 8140 8141 port = 2 * i + 1; 8142 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8143 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8144 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8145 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8146 is_idle &= is_eng_idle; 8147 8148 if (mask && !is_eng_idle) 8149 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8150 if (e) 8151 hl_engine_data_sprintf(e, nic_fmt, port, 8152 is_eng_idle ? "Y" : "N", 8153 qm_glbl_sts0, qm_cgm_sts); 8154 } 8155 } 8156 8157 if (e) 8158 hl_engine_data_sprintf(e, "\n"); 8159 8160 return is_idle; 8161 } 8162 8163 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8164 __acquires(&gaudi->hw_queues_lock) 8165 { 8166 struct gaudi_device *gaudi = hdev->asic_specific; 8167 8168 spin_lock(&gaudi->hw_queues_lock); 8169 } 8170 8171 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8172 __releases(&gaudi->hw_queues_lock) 8173 { 8174 struct gaudi_device *gaudi = hdev->asic_specific; 8175 8176 spin_unlock(&gaudi->hw_queues_lock); 8177 } 8178 8179 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8180 { 8181 return hdev->pdev->device; 8182 } 8183 8184 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8185 size_t max_size) 8186 { 8187 struct gaudi_device *gaudi = hdev->asic_specific; 8188 8189 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8190 return 0; 8191 8192 return hl_fw_get_eeprom_data(hdev, data, max_size); 8193 } 8194 8195 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8196 { 8197 struct gaudi_device *gaudi = hdev->asic_specific; 8198 8199 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8200 return 0; 8201 8202 return hl_fw_get_monitor_dump(hdev, data); 8203 } 8204 8205 /* 8206 * this function should be used only during initialization and/or after reset, 8207 * when there are no active users. 8208 */ 8209 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8210 { 8211 u64 kernel_timeout; 8212 u32 status, offset; 8213 int rc; 8214 8215 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8216 8217 if (hdev->pldm) 8218 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8219 else 8220 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8221 8222 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8223 lower_32_bits(tpc_kernel)); 8224 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8225 upper_32_bits(tpc_kernel)); 8226 8227 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8228 lower_32_bits(tpc_kernel)); 8229 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8230 upper_32_bits(tpc_kernel)); 8231 /* set a valid LUT pointer, content is of no significance */ 8232 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8233 lower_32_bits(tpc_kernel)); 8234 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8235 upper_32_bits(tpc_kernel)); 8236 8237 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8238 lower_32_bits(CFG_BASE + 8239 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8240 8241 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8242 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8243 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8244 /* wait a bit for the engine to start executing */ 8245 usleep_range(1000, 1500); 8246 8247 /* wait until engine has finished executing */ 8248 rc = hl_poll_timeout( 8249 hdev, 8250 mmTPC0_CFG_STATUS + offset, 8251 status, 8252 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8253 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8254 1000, 8255 kernel_timeout); 8256 8257 if (rc) { 8258 dev_err(hdev->dev, 8259 "Timeout while waiting for TPC%d icache prefetch\n", 8260 tpc_id); 8261 return -EIO; 8262 } 8263 8264 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8265 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8266 8267 /* wait a bit for the engine to start executing */ 8268 usleep_range(1000, 1500); 8269 8270 /* wait until engine has finished executing */ 8271 rc = hl_poll_timeout( 8272 hdev, 8273 mmTPC0_CFG_STATUS + offset, 8274 status, 8275 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8276 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8277 1000, 8278 kernel_timeout); 8279 8280 if (rc) { 8281 dev_err(hdev->dev, 8282 "Timeout while waiting for TPC%d vector pipe\n", 8283 tpc_id); 8284 return -EIO; 8285 } 8286 8287 rc = hl_poll_timeout( 8288 hdev, 8289 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8290 status, 8291 (status == 0), 8292 1000, 8293 kernel_timeout); 8294 8295 if (rc) { 8296 dev_err(hdev->dev, 8297 "Timeout while waiting for TPC%d kernel to execute\n", 8298 tpc_id); 8299 return -EIO; 8300 } 8301 8302 return 0; 8303 } 8304 8305 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8306 struct hl_ctx *ctx) 8307 { 8308 struct gaudi_device *gaudi = hdev->asic_specific; 8309 int min_alloc_order, rc, collective_cb_size; 8310 8311 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8312 return 0; 8313 8314 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8315 HOST_SPACE_INTERNAL_CB_SZ, 8316 &hdev->internal_cb_pool_dma_addr, 8317 GFP_KERNEL | __GFP_ZERO); 8318 8319 if (!hdev->internal_cb_pool_virt_addr) 8320 return -ENOMEM; 8321 8322 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8323 sizeof(struct packet_fence); 8324 min_alloc_order = ilog2(collective_cb_size); 8325 8326 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8327 if (!hdev->internal_cb_pool) { 8328 dev_err(hdev->dev, 8329 "Failed to create internal CB pool\n"); 8330 rc = -ENOMEM; 8331 goto free_internal_cb_pool; 8332 } 8333 8334 rc = gen_pool_add(hdev->internal_cb_pool, 8335 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8336 HOST_SPACE_INTERNAL_CB_SZ, -1); 8337 if (rc) { 8338 dev_err(hdev->dev, 8339 "Failed to add memory to internal CB pool\n"); 8340 rc = -EFAULT; 8341 goto destroy_internal_cb_pool; 8342 } 8343 8344 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8345 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8346 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8347 8348 if (!hdev->internal_cb_va_base) { 8349 rc = -ENOMEM; 8350 goto destroy_internal_cb_pool; 8351 } 8352 8353 mutex_lock(&hdev->mmu_lock); 8354 8355 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8356 hdev->internal_cb_pool_dma_addr, 8357 HOST_SPACE_INTERNAL_CB_SZ); 8358 if (rc) 8359 goto unreserve_internal_cb_pool; 8360 8361 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8362 if (rc) 8363 goto unmap_internal_cb_pool; 8364 8365 mutex_unlock(&hdev->mmu_lock); 8366 8367 return 0; 8368 8369 unmap_internal_cb_pool: 8370 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8371 HOST_SPACE_INTERNAL_CB_SZ); 8372 unreserve_internal_cb_pool: 8373 mutex_unlock(&hdev->mmu_lock); 8374 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8375 HOST_SPACE_INTERNAL_CB_SZ); 8376 destroy_internal_cb_pool: 8377 gen_pool_destroy(hdev->internal_cb_pool); 8378 free_internal_cb_pool: 8379 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8380 hdev->internal_cb_pool_dma_addr); 8381 8382 return rc; 8383 } 8384 8385 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8386 struct hl_ctx *ctx) 8387 { 8388 struct gaudi_device *gaudi = hdev->asic_specific; 8389 8390 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8391 return; 8392 8393 mutex_lock(&hdev->mmu_lock); 8394 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8395 HOST_SPACE_INTERNAL_CB_SZ); 8396 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8397 HOST_SPACE_INTERNAL_CB_SZ); 8398 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8399 mutex_unlock(&hdev->mmu_lock); 8400 8401 gen_pool_destroy(hdev->internal_cb_pool); 8402 8403 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8404 hdev->internal_cb_pool_dma_addr); 8405 } 8406 8407 static int gaudi_ctx_init(struct hl_ctx *ctx) 8408 { 8409 int rc; 8410 8411 if (ctx->asid == HL_KERNEL_ASID_ID) 8412 return 0; 8413 8414 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8415 if (rc) 8416 return rc; 8417 8418 rc = gaudi_restore_user_registers(ctx->hdev); 8419 if (rc) 8420 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8421 8422 return rc; 8423 } 8424 8425 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8426 { 8427 if (ctx->asid == HL_KERNEL_ASID_ID) 8428 return; 8429 8430 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8431 } 8432 8433 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8434 { 8435 return 0; 8436 } 8437 8438 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8439 { 8440 return gaudi_cq_assignment[cq_idx]; 8441 } 8442 8443 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8444 { 8445 return sizeof(struct packet_msg_short) + 8446 sizeof(struct packet_msg_prot) * 2; 8447 } 8448 8449 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8450 { 8451 return sizeof(struct packet_msg_short) * 4 + 8452 sizeof(struct packet_fence) + 8453 sizeof(struct packet_msg_prot) * 2; 8454 } 8455 8456 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8457 { 8458 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8459 } 8460 8461 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8462 u32 size, bool eb) 8463 { 8464 struct hl_cb *cb = (struct hl_cb *) data; 8465 struct packet_msg_short *pkt; 8466 u32 value, ctl, pkt_size = sizeof(*pkt); 8467 8468 pkt = cb->kernel_address + size; 8469 memset(pkt, 0, pkt_size); 8470 8471 /* Inc by 1, Mode ADD */ 8472 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8473 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8474 8475 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8476 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8477 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8478 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8479 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8480 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8481 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8482 8483 pkt->value = cpu_to_le32(value); 8484 pkt->ctl = cpu_to_le32(ctl); 8485 8486 return size + pkt_size; 8487 } 8488 8489 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8490 u16 addr) 8491 { 8492 u32 ctl, pkt_size = sizeof(*pkt); 8493 8494 memset(pkt, 0, pkt_size); 8495 8496 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8497 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8498 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8499 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8500 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8501 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8502 8503 pkt->value = cpu_to_le32(value); 8504 pkt->ctl = cpu_to_le32(ctl); 8505 8506 return pkt_size; 8507 } 8508 8509 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8510 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8511 u16 sob_val, u16 mon_id) 8512 { 8513 u64 monitor_base; 8514 u32 ctl, value, pkt_size = sizeof(*pkt); 8515 u16 msg_addr_offset; 8516 u8 mask; 8517 8518 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8519 dev_err(hdev->dev, 8520 "sob_base %u (mask %#x) is not valid\n", 8521 sob_base, sob_mask); 8522 return 0; 8523 } 8524 8525 /* 8526 * monitor_base should be the content of the base0 address registers, 8527 * so it will be added to the msg short offsets 8528 */ 8529 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8530 8531 msg_addr_offset = 8532 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8533 monitor_base; 8534 8535 memset(pkt, 0, pkt_size); 8536 8537 /* Monitor config packet: bind the monitor to a sync object */ 8538 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8539 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8540 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8541 0); /* GREATER OR EQUAL*/ 8542 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8543 8544 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8545 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8546 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8547 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8548 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8549 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8551 8552 pkt->value = cpu_to_le32(value); 8553 pkt->ctl = cpu_to_le32(ctl); 8554 8555 return pkt_size; 8556 } 8557 8558 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8559 { 8560 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8561 8562 memset(pkt, 0, pkt_size); 8563 8564 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8565 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8566 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8567 8568 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8569 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8570 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8571 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8572 8573 pkt->cfg = cpu_to_le32(cfg); 8574 pkt->ctl = cpu_to_le32(ctl); 8575 8576 return pkt_size; 8577 } 8578 8579 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8580 { 8581 u32 offset, nic_index; 8582 8583 switch (queue_id) { 8584 case GAUDI_QUEUE_ID_DMA_0_0: 8585 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8586 break; 8587 case GAUDI_QUEUE_ID_DMA_0_1: 8588 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8589 break; 8590 case GAUDI_QUEUE_ID_DMA_0_2: 8591 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8592 break; 8593 case GAUDI_QUEUE_ID_DMA_0_3: 8594 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8595 break; 8596 case GAUDI_QUEUE_ID_DMA_1_0: 8597 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8598 break; 8599 case GAUDI_QUEUE_ID_DMA_1_1: 8600 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8601 break; 8602 case GAUDI_QUEUE_ID_DMA_1_2: 8603 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8604 break; 8605 case GAUDI_QUEUE_ID_DMA_1_3: 8606 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8607 break; 8608 case GAUDI_QUEUE_ID_DMA_5_0: 8609 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8610 break; 8611 case GAUDI_QUEUE_ID_DMA_5_1: 8612 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8613 break; 8614 case GAUDI_QUEUE_ID_DMA_5_2: 8615 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8616 break; 8617 case GAUDI_QUEUE_ID_DMA_5_3: 8618 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8619 break; 8620 case GAUDI_QUEUE_ID_TPC_7_0: 8621 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8622 break; 8623 case GAUDI_QUEUE_ID_TPC_7_1: 8624 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8625 break; 8626 case GAUDI_QUEUE_ID_TPC_7_2: 8627 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8628 break; 8629 case GAUDI_QUEUE_ID_TPC_7_3: 8630 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8631 break; 8632 case GAUDI_QUEUE_ID_NIC_0_0: 8633 case GAUDI_QUEUE_ID_NIC_1_0: 8634 case GAUDI_QUEUE_ID_NIC_2_0: 8635 case GAUDI_QUEUE_ID_NIC_3_0: 8636 case GAUDI_QUEUE_ID_NIC_4_0: 8637 case GAUDI_QUEUE_ID_NIC_5_0: 8638 case GAUDI_QUEUE_ID_NIC_6_0: 8639 case GAUDI_QUEUE_ID_NIC_7_0: 8640 case GAUDI_QUEUE_ID_NIC_8_0: 8641 case GAUDI_QUEUE_ID_NIC_9_0: 8642 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8643 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8644 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8645 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8646 break; 8647 case GAUDI_QUEUE_ID_NIC_0_1: 8648 case GAUDI_QUEUE_ID_NIC_1_1: 8649 case GAUDI_QUEUE_ID_NIC_2_1: 8650 case GAUDI_QUEUE_ID_NIC_3_1: 8651 case GAUDI_QUEUE_ID_NIC_4_1: 8652 case GAUDI_QUEUE_ID_NIC_5_1: 8653 case GAUDI_QUEUE_ID_NIC_6_1: 8654 case GAUDI_QUEUE_ID_NIC_7_1: 8655 case GAUDI_QUEUE_ID_NIC_8_1: 8656 case GAUDI_QUEUE_ID_NIC_9_1: 8657 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8658 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8659 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8660 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8661 break; 8662 case GAUDI_QUEUE_ID_NIC_0_2: 8663 case GAUDI_QUEUE_ID_NIC_1_2: 8664 case GAUDI_QUEUE_ID_NIC_2_2: 8665 case GAUDI_QUEUE_ID_NIC_3_2: 8666 case GAUDI_QUEUE_ID_NIC_4_2: 8667 case GAUDI_QUEUE_ID_NIC_5_2: 8668 case GAUDI_QUEUE_ID_NIC_6_2: 8669 case GAUDI_QUEUE_ID_NIC_7_2: 8670 case GAUDI_QUEUE_ID_NIC_8_2: 8671 case GAUDI_QUEUE_ID_NIC_9_2: 8672 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8673 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8674 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8675 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8676 break; 8677 case GAUDI_QUEUE_ID_NIC_0_3: 8678 case GAUDI_QUEUE_ID_NIC_1_3: 8679 case GAUDI_QUEUE_ID_NIC_2_3: 8680 case GAUDI_QUEUE_ID_NIC_3_3: 8681 case GAUDI_QUEUE_ID_NIC_4_3: 8682 case GAUDI_QUEUE_ID_NIC_5_3: 8683 case GAUDI_QUEUE_ID_NIC_6_3: 8684 case GAUDI_QUEUE_ID_NIC_7_3: 8685 case GAUDI_QUEUE_ID_NIC_8_3: 8686 case GAUDI_QUEUE_ID_NIC_9_3: 8687 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8688 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8689 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8690 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8691 break; 8692 default: 8693 return -EINVAL; 8694 } 8695 8696 *addr = CFG_BASE + offset; 8697 8698 return 0; 8699 } 8700 8701 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8702 { 8703 u64 monitor_base; 8704 u32 size = 0; 8705 u16 msg_addr_offset; 8706 8707 /* 8708 * monitor_base should be the content of the base0 address registers, 8709 * so it will be added to the msg short offsets 8710 */ 8711 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8712 8713 /* First monitor config packet: low address of the sync */ 8714 msg_addr_offset = 8715 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8716 monitor_base; 8717 8718 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8719 msg_addr_offset); 8720 8721 /* Second monitor config packet: high address of the sync */ 8722 msg_addr_offset = 8723 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8724 monitor_base; 8725 8726 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8727 msg_addr_offset); 8728 8729 /* 8730 * Third monitor config packet: the payload, i.e. what to write when the 8731 * sync triggers 8732 */ 8733 msg_addr_offset = 8734 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8735 monitor_base; 8736 8737 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8738 8739 return size; 8740 } 8741 8742 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8743 struct hl_gen_wait_properties *prop) 8744 { 8745 struct hl_cb *cb = (struct hl_cb *) prop->data; 8746 void *buf = cb->kernel_address; 8747 u64 fence_addr = 0; 8748 u32 size = prop->size; 8749 8750 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8751 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8752 prop->q_idx); 8753 return 0; 8754 } 8755 8756 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8757 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8758 prop->sob_mask, prop->sob_val, prop->mon_id); 8759 size += gaudi_add_fence_pkt(buf + size); 8760 8761 return size; 8762 } 8763 8764 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8765 { 8766 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8767 8768 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8769 hw_sob->sob_id); 8770 8771 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8772 hw_sob->sob_id * 4, 0); 8773 8774 kref_init(&hw_sob->kref); 8775 } 8776 8777 static u64 gaudi_get_device_time(struct hl_device *hdev) 8778 { 8779 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8780 8781 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8782 } 8783 8784 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8785 u32 *block_size, u32 *block_id) 8786 { 8787 return -EPERM; 8788 } 8789 8790 static int gaudi_block_mmap(struct hl_device *hdev, 8791 struct vm_area_struct *vma, 8792 u32 block_id, u32 block_size) 8793 { 8794 return -EPERM; 8795 } 8796 8797 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8798 { 8799 struct cpu_dyn_regs *dyn_regs = 8800 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8801 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8802 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8803 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8804 8805 WREG32(irq_handler_offset, 8806 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8807 } 8808 8809 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8810 { 8811 return -EINVAL; 8812 } 8813 8814 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8815 { 8816 switch (pll_idx) { 8817 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8818 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8819 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8820 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8821 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8822 case HL_GAUDI_MME_PLL: return MME_PLL; 8823 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8824 case HL_GAUDI_IF_PLL: return IF_PLL; 8825 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8826 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8827 default: return -EINVAL; 8828 } 8829 } 8830 8831 static int gaudi_add_sync_to_engine_map_entry( 8832 struct hl_sync_to_engine_map *map, u32 reg_value, 8833 enum hl_sync_engine_type engine_type, u32 engine_id) 8834 { 8835 struct hl_sync_to_engine_map_entry *entry; 8836 8837 /* Reg value represents a partial address of sync object, 8838 * it is used as unique identifier. For this we need to 8839 * clear the cutoff cfg base bits from the value. 8840 */ 8841 if (reg_value == 0 || reg_value == 0xffffffff) 8842 return 0; 8843 reg_value -= lower_32_bits(CFG_BASE); 8844 8845 /* create a new hash entry */ 8846 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8847 if (!entry) 8848 return -ENOMEM; 8849 entry->engine_type = engine_type; 8850 entry->engine_id = engine_id; 8851 entry->sync_id = reg_value; 8852 hash_add(map->tb, &entry->node, reg_value); 8853 8854 return 0; 8855 } 8856 8857 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8858 struct hl_sync_to_engine_map *map) 8859 { 8860 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8861 int i, j, rc; 8862 u32 reg_value; 8863 8864 /* Iterate over TPC engines */ 8865 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8866 8867 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8868 sds->props[SP_NEXT_TPC] * i); 8869 8870 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8871 ENGINE_TPC, i); 8872 if (rc) 8873 goto free_sync_to_engine_map; 8874 } 8875 8876 /* Iterate over MME engines */ 8877 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8878 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8879 8880 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8881 sds->props[SP_NEXT_MME] * i + 8882 j * sizeof(u32)); 8883 8884 rc = gaudi_add_sync_to_engine_map_entry( 8885 map, reg_value, ENGINE_MME, 8886 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8887 if (rc) 8888 goto free_sync_to_engine_map; 8889 } 8890 } 8891 8892 /* Iterate over DMA engines */ 8893 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8894 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8895 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8896 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8897 ENGINE_DMA, i); 8898 if (rc) 8899 goto free_sync_to_engine_map; 8900 } 8901 8902 return 0; 8903 8904 free_sync_to_engine_map: 8905 hl_state_dump_free_sync_to_engine_map(map); 8906 8907 return rc; 8908 } 8909 8910 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8911 { 8912 return FIELD_GET( 8913 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8914 mon->status); 8915 } 8916 8917 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8918 { 8919 const size_t max_write = 10; 8920 u32 gid, mask, sob; 8921 int i, offset; 8922 8923 /* Sync object ID is calculated as follows: 8924 * (8 * group_id + cleared bits in mask) 8925 */ 8926 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8927 mon->arm_data); 8928 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8929 mon->arm_data); 8930 8931 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8932 max_write; mask >>= 1, i++) { 8933 if (!(mask & 1)) { 8934 sob = gid * MONITOR_MAX_SOBS + i; 8935 8936 if (offset > 0) 8937 offset += snprintf(sobs + offset, max_write, 8938 ", "); 8939 8940 offset += snprintf(sobs + offset, max_write, "%u", sob); 8941 } 8942 } 8943 } 8944 8945 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8946 struct hl_device *hdev, 8947 struct hl_mon_state_dump *mon) 8948 { 8949 const char *name; 8950 char scratch_buf1[BIN_REG_STRING_SIZE], 8951 scratch_buf2[BIN_REG_STRING_SIZE]; 8952 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8953 8954 name = hl_state_dump_get_monitor_name(hdev, mon); 8955 if (!name) 8956 name = ""; 8957 8958 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 8959 8960 return hl_snprintf_resize( 8961 buf, size, offset, 8962 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 8963 mon->id, name, 8964 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8965 mon->arm_data), 8966 hl_format_as_binary( 8967 scratch_buf1, sizeof(scratch_buf1), 8968 FIELD_GET( 8969 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8970 mon->arm_data)), 8971 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 8972 mon->arm_data), 8973 mon->wr_data, 8974 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 8975 hl_format_as_binary( 8976 scratch_buf2, sizeof(scratch_buf2), 8977 FIELD_GET( 8978 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 8979 mon->status)), 8980 monitored_sobs); 8981 } 8982 8983 8984 static int gaudi_print_fences_single_engine( 8985 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 8986 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 8987 size_t *size, size_t *offset) 8988 { 8989 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8990 int rc = -ENOMEM, i; 8991 u32 *statuses, *fences; 8992 8993 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 8994 sizeof(*statuses), GFP_KERNEL); 8995 if (!statuses) 8996 goto out; 8997 8998 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 8999 sds->props[SP_ENGINE_NUM_OF_QUEUES], 9000 sizeof(*fences), GFP_KERNEL); 9001 if (!fences) 9002 goto free_status; 9003 9004 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 9005 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 9006 9007 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 9008 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 9009 fences[i] = RREG32(base_offset + i * sizeof(u32)); 9010 9011 /* The actual print */ 9012 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 9013 u32 fence_id; 9014 u64 fence_cnt, fence_rdata; 9015 const char *engine_name; 9016 9017 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 9018 statuses[i])) 9019 continue; 9020 9021 fence_id = 9022 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9023 fence_cnt = base_offset + CFG_BASE + 9024 sizeof(u32) * 9025 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9026 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9027 sds->props[SP_FENCE0_RDATA_OFFSET]; 9028 engine_name = hl_sync_engine_to_string(engine_type); 9029 9030 rc = hl_snprintf_resize( 9031 buf, size, offset, 9032 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9033 engine_name, engine_id, 9034 i, fence_id, 9035 fence_cnt, engine_name, engine_id, fence_id, i, 9036 fence_rdata, engine_name, engine_id, fence_id, i, 9037 fences[fence_id], 9038 statuses[i]); 9039 if (rc) 9040 goto free_fences; 9041 } 9042 9043 rc = 0; 9044 9045 free_fences: 9046 kfree(fences); 9047 free_status: 9048 kfree(statuses); 9049 out: 9050 return rc; 9051 } 9052 9053 9054 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9055 .monitor_valid = gaudi_monitor_valid, 9056 .print_single_monitor = gaudi_print_single_monitor, 9057 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9058 .print_fences_single_engine = gaudi_print_fences_single_engine, 9059 }; 9060 9061 static void gaudi_state_dump_init(struct hl_device *hdev) 9062 { 9063 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9064 int i; 9065 9066 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9067 hash_add(sds->so_id_to_str_tb, 9068 &gaudi_so_id_to_str[i].node, 9069 gaudi_so_id_to_str[i].id); 9070 9071 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9072 hash_add(sds->monitor_id_to_str_tb, 9073 &gaudi_monitor_id_to_str[i].node, 9074 gaudi_monitor_id_to_str[i].id); 9075 9076 sds->props = gaudi_state_dump_specs_props; 9077 9078 sds->sync_namager_names = gaudi_sync_manager_names; 9079 9080 sds->funcs = gaudi_state_dump_funcs; 9081 } 9082 9083 static u32 *gaudi_get_stream_master_qid_arr(void) 9084 { 9085 return gaudi_stream_master; 9086 } 9087 9088 static int gaudi_set_dram_properties(struct hl_device *hdev) 9089 { 9090 return 0; 9091 } 9092 9093 static int gaudi_set_binning_masks(struct hl_device *hdev) 9094 { 9095 return 0; 9096 } 9097 9098 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9099 { 9100 } 9101 9102 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9103 { 9104 struct hl_device *hdev = dev_get_drvdata(dev); 9105 struct cpucp_info *cpucp_info; 9106 9107 cpucp_info = &hdev->asic_prop.cpucp_info; 9108 9109 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9110 } 9111 9112 static DEVICE_ATTR_RO(infineon_ver); 9113 9114 static struct attribute *gaudi_vrm_dev_attrs[] = { 9115 &dev_attr_infineon_ver.attr, 9116 NULL, 9117 }; 9118 9119 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9120 struct attribute_group *dev_vrm_attr_grp) 9121 { 9122 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9123 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9124 } 9125 9126 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9127 { 9128 return 0; 9129 } 9130 9131 static const struct hl_asic_funcs gaudi_funcs = { 9132 .early_init = gaudi_early_init, 9133 .early_fini = gaudi_early_fini, 9134 .late_init = gaudi_late_init, 9135 .late_fini = gaudi_late_fini, 9136 .sw_init = gaudi_sw_init, 9137 .sw_fini = gaudi_sw_fini, 9138 .hw_init = gaudi_hw_init, 9139 .hw_fini = gaudi_hw_fini, 9140 .halt_engines = gaudi_halt_engines, 9141 .suspend = gaudi_suspend, 9142 .resume = gaudi_resume, 9143 .mmap = gaudi_mmap, 9144 .ring_doorbell = gaudi_ring_doorbell, 9145 .pqe_write = gaudi_pqe_write, 9146 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9147 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9148 .scrub_device_mem = gaudi_scrub_device_mem, 9149 .scrub_device_dram = gaudi_scrub_device_dram, 9150 .get_int_queue_base = gaudi_get_int_queue_base, 9151 .test_queues = gaudi_test_queues, 9152 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9153 .asic_dma_pool_free = gaudi_dma_pool_free, 9154 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9155 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9156 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, 9157 .cs_parser = gaudi_cs_parser, 9158 .dma_map_sgtable = hl_asic_dma_map_sgtable, 9159 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9160 .update_eq_ci = gaudi_update_eq_ci, 9161 .context_switch = gaudi_context_switch, 9162 .restore_phase_topology = gaudi_restore_phase_topology, 9163 .debugfs_read_dma = gaudi_debugfs_read_dma, 9164 .add_device_attr = gaudi_add_device_attr, 9165 .handle_eqe = gaudi_handle_eqe, 9166 .get_events_stat = gaudi_get_events_stat, 9167 .read_pte = gaudi_read_pte, 9168 .write_pte = gaudi_write_pte, 9169 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9170 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9171 .mmu_prefetch_cache_range = NULL, 9172 .send_heartbeat = gaudi_send_heartbeat, 9173 .debug_coresight = gaudi_debug_coresight, 9174 .is_device_idle = gaudi_is_device_idle, 9175 .compute_reset_late_init = gaudi_compute_reset_late_init, 9176 .hw_queues_lock = gaudi_hw_queues_lock, 9177 .hw_queues_unlock = gaudi_hw_queues_unlock, 9178 .get_pci_id = gaudi_get_pci_id, 9179 .get_eeprom_data = gaudi_get_eeprom_data, 9180 .get_monitor_dump = gaudi_get_monitor_dump, 9181 .send_cpu_message = gaudi_send_cpu_message, 9182 .pci_bars_map = gaudi_pci_bars_map, 9183 .init_iatu = gaudi_init_iatu, 9184 .rreg = hl_rreg, 9185 .wreg = hl_wreg, 9186 .halt_coresight = gaudi_halt_coresight, 9187 .ctx_init = gaudi_ctx_init, 9188 .ctx_fini = gaudi_ctx_fini, 9189 .pre_schedule_cs = gaudi_pre_schedule_cs, 9190 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9191 .load_firmware_to_device = gaudi_load_firmware_to_device, 9192 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9193 .get_signal_cb_size = gaudi_get_signal_cb_size, 9194 .get_wait_cb_size = gaudi_get_wait_cb_size, 9195 .gen_signal_cb = gaudi_gen_signal_cb, 9196 .gen_wait_cb = gaudi_gen_wait_cb, 9197 .reset_sob = gaudi_reset_sob, 9198 .reset_sob_group = gaudi_reset_sob_group, 9199 .get_device_time = gaudi_get_device_time, 9200 .pb_print_security_errors = NULL, 9201 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9202 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9203 .get_dec_base_addr = NULL, 9204 .scramble_addr = hl_mmu_scramble_addr, 9205 .descramble_addr = hl_mmu_descramble_addr, 9206 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9207 .get_hw_block_id = gaudi_get_hw_block_id, 9208 .hw_block_mmap = gaudi_block_mmap, 9209 .enable_events_from_fw = gaudi_enable_events_from_fw, 9210 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9211 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9212 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9213 .init_firmware_loader = gaudi_init_firmware_loader, 9214 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9215 .state_dump_init = gaudi_state_dump_init, 9216 .get_sob_addr = gaudi_get_sob_addr, 9217 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9218 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9219 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9220 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9221 .access_dev_mem = hl_access_dev_mem, 9222 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9223 .send_device_activity = gaudi_send_device_activity, 9224 .set_dram_properties = gaudi_set_dram_properties, 9225 .set_binning_masks = gaudi_set_binning_masks, 9226 }; 9227 9228 /** 9229 * gaudi_set_asic_funcs - set GAUDI function pointers 9230 * 9231 * @hdev: pointer to hl_device structure 9232 * 9233 */ 9234 void gaudi_set_asic_funcs(struct hl_device *hdev) 9235 { 9236 hdev->asic_funcs = &gaudi_funcs; 9237 } 9238