1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include "gaudiP.h" 9 #include "../include/hw_ip/mmu/mmu_general.h" 10 #include "../include/hw_ip/mmu/mmu_v1_1.h" 11 #include "../include/gaudi/gaudi_masks.h" 12 #include "../include/gaudi/gaudi_fw_if.h" 13 #include "../include/gaudi/gaudi_reg_map.h" 14 #include "../include/gaudi/gaudi_async_ids_map_extended.h" 15 16 #include <linux/module.h> 17 #include <linux/pci.h> 18 #include <linux/firmware.h> 19 #include <linux/hwmon.h> 20 #include <linux/iommu.h> 21 #include <linux/seq_file.h> 22 23 /* 24 * Gaudi security scheme: 25 * 26 * 1. Host is protected by: 27 * - Range registers 28 * - MMU 29 * 30 * 2. DDR is protected by: 31 * - Range registers (protect the first 512MB) 32 * 33 * 3. Configuration is protected by: 34 * - Range registers 35 * - Protection bits 36 * 37 * MMU is always enabled. 38 * 39 * QMAN DMA channels 0,1 (PCI DMAN): 40 * - DMA is not secured. 41 * - PQ and CQ are secured. 42 * - CP is secured: The driver needs to parse CB but WREG should be allowed 43 * because of TDMA (tensor DMA). Hence, WREG is always not 44 * secured. 45 * 46 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA 47 * channel 0 to be secured, execute the DMA and change it back to not secured. 48 * Currently, the driver doesn't use the DMA while there are compute jobs 49 * running. 50 * 51 * The current use cases for the driver to use the DMA are: 52 * - Clear SRAM on context switch (happens on context switch when device is 53 * idle) 54 * - MMU page tables area clear (happens on init) 55 * 56 * QMAN DMA 2-7, TPC, MME, NIC: 57 * PQ is secured and is located on the Host (HBM CON TPC3 bug) 58 * CQ, CP and the engine are not secured 59 * 60 */ 61 62 #define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb" 63 #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" 64 #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" 65 66 MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE); 67 MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE); 68 MODULE_FIRMWARE(GAUDI_TPC_FW_FILE); 69 70 #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ 71 72 #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 73 #define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */ 74 #define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */ 75 #define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */ 76 77 #define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */ 78 #define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */ 79 #define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */ 80 #define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100) 81 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 82 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30) 83 #define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 4000000 /* 4s */ 84 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC 4000000 /* 4s */ 85 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000 /* 15s */ 86 87 #define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9 88 89 #define GAUDI_MAX_STRING_LEN 20 90 91 #define GAUDI_CB_POOL_CB_CNT 512 92 #define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */ 93 94 #define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3 95 96 #define GAUDI_NUM_OF_TPC_INTR_CAUSE 20 97 98 #define GAUDI_NUM_OF_QM_ERR_CAUSE 16 99 100 #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 101 102 #define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ 103 104 #define HBM_SCRUBBING_TIMEOUT_US 1000000 /* 1s */ 105 106 #define BIN_REG_STRING_SIZE sizeof("0b10101010101010101010101010101010") 107 108 #define MONITOR_SOB_STRING_SIZE 256 109 110 static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = { 111 GAUDI_QUEUE_ID_DMA_0_0, 112 GAUDI_QUEUE_ID_DMA_0_1, 113 GAUDI_QUEUE_ID_DMA_0_2, 114 GAUDI_QUEUE_ID_DMA_0_3, 115 GAUDI_QUEUE_ID_DMA_1_0, 116 GAUDI_QUEUE_ID_DMA_1_1, 117 GAUDI_QUEUE_ID_DMA_1_2, 118 GAUDI_QUEUE_ID_DMA_1_3 119 }; 120 121 static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = { 122 [GAUDI_PCI_DMA_1] = GAUDI_ENGINE_ID_DMA_0, 123 [GAUDI_PCI_DMA_2] = GAUDI_ENGINE_ID_DMA_1, 124 [GAUDI_HBM_DMA_1] = GAUDI_ENGINE_ID_DMA_2, 125 [GAUDI_HBM_DMA_2] = GAUDI_ENGINE_ID_DMA_3, 126 [GAUDI_HBM_DMA_3] = GAUDI_ENGINE_ID_DMA_4, 127 [GAUDI_HBM_DMA_4] = GAUDI_ENGINE_ID_DMA_5, 128 [GAUDI_HBM_DMA_5] = GAUDI_ENGINE_ID_DMA_6, 129 [GAUDI_HBM_DMA_6] = GAUDI_ENGINE_ID_DMA_7 130 }; 131 132 static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = { 133 [0] = GAUDI_QUEUE_ID_DMA_0_0, 134 [1] = GAUDI_QUEUE_ID_DMA_0_1, 135 [2] = GAUDI_QUEUE_ID_DMA_0_2, 136 [3] = GAUDI_QUEUE_ID_DMA_0_3, 137 [4] = GAUDI_QUEUE_ID_DMA_1_0, 138 [5] = GAUDI_QUEUE_ID_DMA_1_1, 139 [6] = GAUDI_QUEUE_ID_DMA_1_2, 140 [7] = GAUDI_QUEUE_ID_DMA_1_3, 141 }; 142 143 static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = { 144 [PACKET_WREG_32] = sizeof(struct packet_wreg32), 145 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk), 146 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long), 147 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short), 148 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma), 149 [PACKET_REPEAT] = sizeof(struct packet_repeat), 150 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot), 151 [PACKET_FENCE] = sizeof(struct packet_fence), 152 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma), 153 [PACKET_NOP] = sizeof(struct packet_nop), 154 [PACKET_STOP] = sizeof(struct packet_stop), 155 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point), 156 [PACKET_WAIT] = sizeof(struct packet_wait), 157 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe) 158 }; 159 160 static inline bool validate_packet_id(enum packet_id id) 161 { 162 switch (id) { 163 case PACKET_WREG_32: 164 case PACKET_WREG_BULK: 165 case PACKET_MSG_LONG: 166 case PACKET_MSG_SHORT: 167 case PACKET_CP_DMA: 168 case PACKET_REPEAT: 169 case PACKET_MSG_PROT: 170 case PACKET_FENCE: 171 case PACKET_LIN_DMA: 172 case PACKET_NOP: 173 case PACKET_STOP: 174 case PACKET_ARB_POINT: 175 case PACKET_WAIT: 176 case PACKET_LOAD_AND_EXE: 177 return true; 178 default: 179 return false; 180 } 181 } 182 183 static const char * const 184 gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = { 185 "tpc_address_exceed_slm", 186 "tpc_div_by_0", 187 "tpc_spu_mac_overflow", 188 "tpc_spu_addsub_overflow", 189 "tpc_spu_abs_overflow", 190 "tpc_spu_fp_dst_nan_inf", 191 "tpc_spu_fp_dst_denorm", 192 "tpc_vpu_mac_overflow", 193 "tpc_vpu_addsub_overflow", 194 "tpc_vpu_abs_overflow", 195 "tpc_vpu_fp_dst_nan_inf", 196 "tpc_vpu_fp_dst_denorm", 197 "tpc_assertions", 198 "tpc_illegal_instruction", 199 "tpc_pc_wrap_around", 200 "tpc_qm_sw_err", 201 "tpc_hbw_rresp_err", 202 "tpc_hbw_bresp_err", 203 "tpc_lbw_rresp_err", 204 "tpc_lbw_bresp_err" 205 }; 206 207 static const char * const 208 gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = { 209 "PQ AXI HBW error", 210 "CQ AXI HBW error", 211 "CP AXI HBW error", 212 "CP error due to undefined OPCODE", 213 "CP encountered STOP OPCODE", 214 "CP AXI LBW error", 215 "CP WRREG32 or WRBULK returned error", 216 "N/A", 217 "FENCE 0 inc over max value and clipped", 218 "FENCE 1 inc over max value and clipped", 219 "FENCE 2 inc over max value and clipped", 220 "FENCE 3 inc over max value and clipped", 221 "FENCE 0 dec under min value and clipped", 222 "FENCE 1 dec under min value and clipped", 223 "FENCE 2 dec under min value and clipped", 224 "FENCE 3 dec under min value and clipped" 225 }; 226 227 static const char * const 228 gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = { 229 "Choice push while full error", 230 "Choice Q watchdog error", 231 "MSG AXI LBW returned with error" 232 }; 233 234 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = { 235 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */ 236 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */ 237 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */ 238 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */ 239 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */ 240 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */ 241 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */ 242 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */ 243 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */ 244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */ 245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */ 246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */ 247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */ 248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */ 249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */ 250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */ 251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */ 252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */ 253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */ 254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */ 255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */ 256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_0 */ 257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_1 */ 258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_2 */ 259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_5_3 */ 260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */ 261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */ 262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */ 263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */ 264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */ 265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */ 266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */ 267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */ 268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */ 269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */ 270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */ 271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */ 272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */ 273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */ 274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */ 275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */ 276 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */ 277 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */ 278 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */ 279 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */ 280 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */ 281 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */ 282 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */ 283 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */ 284 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */ 285 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */ 286 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */ 287 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */ 288 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */ 289 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */ 290 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */ 291 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */ 292 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */ 293 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */ 294 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */ 295 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */ 296 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */ 297 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */ 298 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */ 299 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */ 300 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */ 301 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */ 302 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */ 303 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */ 304 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */ 305 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */ 306 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */ 307 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */ 308 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_0 */ 309 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_1 */ 310 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_2 */ 311 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_0_3 */ 312 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_0 */ 313 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_1 */ 314 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_2 */ 315 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_1_3 */ 316 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_0 */ 317 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_1 */ 318 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_2 */ 319 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_2_3 */ 320 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_0 */ 321 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_1 */ 322 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_2 */ 323 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_3_3 */ 324 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_0 */ 325 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_1 */ 326 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_2 */ 327 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_4_3 */ 328 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_0 */ 329 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_1 */ 330 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_2 */ 331 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_5_3 */ 332 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_0 */ 333 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_1 */ 334 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_2 */ 335 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_6_3 */ 336 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_0 */ 337 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_1 */ 338 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_2 */ 339 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_7_3 */ 340 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_0 */ 341 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_1 */ 342 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_2 */ 343 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_8_3 */ 344 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_0 */ 345 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_1 */ 346 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_2 */ 347 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */ 348 }; 349 350 static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = { 351 { .id = 0, .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" }, 352 { .id = 1, .name = "SYNC_OBJ_DMA_UP_FEEDBACK" }, 353 { .id = 2, .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" }, 354 { .id = 3, .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" }, 355 { .id = 4, .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" }, 356 { .id = 5, .name = "SYNC_OBJ_HOST_DRAM_DONE" }, 357 { .id = 6, .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" }, 358 { .id = 7, .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" }, 359 { .id = 8, .name = "SYNC_OBJ_ENGINE_SEM_MME_0" }, 360 { .id = 9, .name = "SYNC_OBJ_ENGINE_SEM_MME_1" }, 361 { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" }, 362 { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" }, 363 { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" }, 364 { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" }, 365 { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" }, 366 { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" }, 367 { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" }, 368 { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" }, 369 { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" }, 370 { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" }, 371 { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" }, 372 { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" }, 373 { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" }, 374 { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" }, 375 { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" }, 376 { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" }, 377 { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" }, 378 }; 379 380 static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = { 381 { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" }, 382 { .id = 201, .name = "MON_OBJ_DMA_UP_FEEDBACK_RESET" }, 383 { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" }, 384 { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" }, 385 { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" }, 386 { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" }, 387 { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" }, 388 { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" }, 389 { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" }, 390 { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" }, 391 { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" }, 392 }; 393 394 static s64 gaudi_state_dump_specs_props[] = { 395 [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0, 396 [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL, 397 [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK, 398 [SP_MON_OBJ_WR_ADDR_LOW] = 399 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0, 400 [SP_MON_OBJ_WR_ADDR_HIGH] = 401 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0, 402 [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0, 403 [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0, 404 [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0, 405 [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK, 406 [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0, 407 [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR, 408 [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0, 409 [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0, 410 [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL, 411 [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0, 412 [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0, 413 [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO, 414 [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0, 415 [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES, 416 [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES, 417 [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES, 418 [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES, 419 [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES, 420 [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS, 421 [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES, 422 [SP_FENCE0_CNT_OFFSET] = 423 mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0, 424 [SP_FENCE0_RDATA_OFFSET] = 425 mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0, 426 [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0, 427 [SP_NUM_CORES] = 1, 428 }; 429 430 static const int gaudi_queue_id_to_engine_id[] = { 431 [GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3] = GAUDI_ENGINE_ID_DMA_0, 432 [GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3] = GAUDI_ENGINE_ID_DMA_1, 433 [GAUDI_QUEUE_ID_CPU_PQ] = GAUDI_ENGINE_ID_SIZE, 434 [GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3] = GAUDI_ENGINE_ID_DMA_2, 435 [GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3] = GAUDI_ENGINE_ID_DMA_3, 436 [GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3] = GAUDI_ENGINE_ID_DMA_4, 437 [GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3] = GAUDI_ENGINE_ID_DMA_5, 438 [GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3] = GAUDI_ENGINE_ID_DMA_6, 439 [GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3] = GAUDI_ENGINE_ID_DMA_7, 440 [GAUDI_QUEUE_ID_MME_0_0...GAUDI_QUEUE_ID_MME_0_3] = GAUDI_ENGINE_ID_MME_0, 441 [GAUDI_QUEUE_ID_MME_1_0...GAUDI_QUEUE_ID_MME_1_3] = GAUDI_ENGINE_ID_MME_2, 442 [GAUDI_QUEUE_ID_TPC_0_0...GAUDI_QUEUE_ID_TPC_0_3] = GAUDI_ENGINE_ID_TPC_0, 443 [GAUDI_QUEUE_ID_TPC_1_0...GAUDI_QUEUE_ID_TPC_1_3] = GAUDI_ENGINE_ID_TPC_1, 444 [GAUDI_QUEUE_ID_TPC_2_0...GAUDI_QUEUE_ID_TPC_2_3] = GAUDI_ENGINE_ID_TPC_2, 445 [GAUDI_QUEUE_ID_TPC_3_0...GAUDI_QUEUE_ID_TPC_3_3] = GAUDI_ENGINE_ID_TPC_3, 446 [GAUDI_QUEUE_ID_TPC_4_0...GAUDI_QUEUE_ID_TPC_4_3] = GAUDI_ENGINE_ID_TPC_4, 447 [GAUDI_QUEUE_ID_TPC_5_0...GAUDI_QUEUE_ID_TPC_5_3] = GAUDI_ENGINE_ID_TPC_5, 448 [GAUDI_QUEUE_ID_TPC_6_0...GAUDI_QUEUE_ID_TPC_6_3] = GAUDI_ENGINE_ID_TPC_6, 449 [GAUDI_QUEUE_ID_TPC_7_0...GAUDI_QUEUE_ID_TPC_7_3] = GAUDI_ENGINE_ID_TPC_7, 450 [GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3] = GAUDI_ENGINE_ID_NIC_0, 451 [GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3] = GAUDI_ENGINE_ID_NIC_1, 452 [GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3] = GAUDI_ENGINE_ID_NIC_2, 453 [GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3] = GAUDI_ENGINE_ID_NIC_3, 454 [GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3] = GAUDI_ENGINE_ID_NIC_4, 455 [GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3] = GAUDI_ENGINE_ID_NIC_5, 456 [GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3] = GAUDI_ENGINE_ID_NIC_6, 457 [GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3] = GAUDI_ENGINE_ID_NIC_7, 458 [GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3] = GAUDI_ENGINE_ID_NIC_8, 459 [GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3] = GAUDI_ENGINE_ID_NIC_9, 460 }; 461 462 /* The order here is opposite to the order of the indexing in the h/w. 463 * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc. 464 */ 465 static const char * const gaudi_sync_manager_names[] = { 466 "SYNC_MGR_E_N", 467 "SYNC_MGR_W_N", 468 "SYNC_MGR_E_S", 469 "SYNC_MGR_W_S", 470 NULL 471 }; 472 473 struct ecc_info_extract_params { 474 u64 block_address; 475 u32 num_memories; 476 bool derr; 477 }; 478 479 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, 480 u64 phys_addr); 481 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 482 struct hl_cs_job *job); 483 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 484 u32 size, u64 val); 485 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 486 u32 num_regs, u32 val); 487 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, 488 u32 tpc_id); 489 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev); 490 static int gaudi_cpucp_info_get(struct hl_device *hdev); 491 static void gaudi_disable_clock_gating(struct hl_device *hdev); 492 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid); 493 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 494 u32 size, bool eb); 495 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 496 struct hl_gen_wait_properties *prop); 497 static inline enum hl_collective_mode 498 get_collective_mode(struct hl_device *hdev, u32 queue_id) 499 { 500 if (gaudi_queue_type[queue_id] == QUEUE_TYPE_EXT) 501 return HL_COLLECTIVE_MASTER; 502 503 if (queue_id >= GAUDI_QUEUE_ID_DMA_5_0 && 504 queue_id <= GAUDI_QUEUE_ID_DMA_5_3) 505 return HL_COLLECTIVE_SLAVE; 506 507 if (queue_id >= GAUDI_QUEUE_ID_TPC_7_0 && 508 queue_id <= GAUDI_QUEUE_ID_TPC_7_3) 509 return HL_COLLECTIVE_SLAVE; 510 511 if (queue_id >= GAUDI_QUEUE_ID_NIC_0_0 && 512 queue_id <= GAUDI_QUEUE_ID_NIC_9_3) 513 return HL_COLLECTIVE_SLAVE; 514 515 return HL_COLLECTIVE_NOT_SUPPORTED; 516 } 517 518 static inline void set_default_power_values(struct hl_device *hdev) 519 { 520 struct asic_fixed_properties *prop = &hdev->asic_prop; 521 522 if (hdev->card_type == cpucp_card_type_pmc) { 523 prop->max_power_default = MAX_POWER_DEFAULT_PMC; 524 525 if (prop->fw_security_enabled) 526 prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC; 527 else 528 prop->dc_power_default = DC_POWER_DEFAULT_PMC; 529 } else { 530 prop->max_power_default = MAX_POWER_DEFAULT_PCI; 531 prop->dc_power_default = DC_POWER_DEFAULT_PCI; 532 } 533 } 534 535 static int gaudi_set_fixed_properties(struct hl_device *hdev) 536 { 537 struct asic_fixed_properties *prop = &hdev->asic_prop; 538 u32 num_sync_stream_queues = 0; 539 int i; 540 541 prop->max_queues = GAUDI_QUEUE_ID_SIZE; 542 prop->hw_queues_props = kcalloc(prop->max_queues, 543 sizeof(struct hw_queue_properties), 544 GFP_KERNEL); 545 546 if (!prop->hw_queues_props) 547 return -ENOMEM; 548 549 for (i = 0 ; i < prop->max_queues ; i++) { 550 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) { 551 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT; 552 prop->hw_queues_props[i].driver_only = 0; 553 prop->hw_queues_props[i].supports_sync_stream = 1; 554 prop->hw_queues_props[i].cb_alloc_flags = 555 CB_ALLOC_KERNEL; 556 num_sync_stream_queues++; 557 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) { 558 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU; 559 prop->hw_queues_props[i].driver_only = 1; 560 prop->hw_queues_props[i].supports_sync_stream = 0; 561 prop->hw_queues_props[i].cb_alloc_flags = 562 CB_ALLOC_KERNEL; 563 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) { 564 prop->hw_queues_props[i].type = QUEUE_TYPE_INT; 565 prop->hw_queues_props[i].driver_only = 0; 566 prop->hw_queues_props[i].supports_sync_stream = 0; 567 prop->hw_queues_props[i].cb_alloc_flags = 568 CB_ALLOC_USER; 569 570 } 571 prop->hw_queues_props[i].collective_mode = 572 get_collective_mode(hdev, i); 573 } 574 575 prop->cache_line_size = DEVICE_CACHE_LINE_SIZE; 576 prop->cfg_base_address = CFG_BASE; 577 prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; 578 prop->host_base_address = HOST_PHYS_BASE; 579 prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; 580 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; 581 prop->completion_mode = HL_COMPLETION_MODE_JOB; 582 prop->collective_first_sob = 0; 583 prop->collective_first_mon = 0; 584 585 /* 2 SOBs per internal queue stream are reserved for collective */ 586 prop->sync_stream_first_sob = 587 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR) 588 * QMAN_STREAMS * HL_RSVD_SOBS; 589 590 /* 1 monitor per internal queue stream are reserved for collective 591 * 2 monitors per external queue stream are reserved for collective 592 */ 593 prop->sync_stream_first_mon = 594 (NUMBER_OF_COLLECTIVE_QUEUES * QMAN_STREAMS) + 595 (NUMBER_OF_EXT_HW_QUEUES * 2); 596 597 prop->dram_base_address = DRAM_PHYS_BASE; 598 prop->dram_size = GAUDI_HBM_SIZE_32GB; 599 prop->dram_end_address = prop->dram_base_address + prop->dram_size; 600 prop->dram_user_base_address = DRAM_BASE_ADDR_USER; 601 602 prop->sram_base_address = SRAM_BASE_ADDR; 603 prop->sram_size = SRAM_SIZE; 604 prop->sram_end_address = prop->sram_base_address + prop->sram_size; 605 prop->sram_user_base_address = 606 prop->sram_base_address + SRAM_USER_BASE_OFFSET; 607 608 prop->mmu_cache_mng_addr = MMU_CACHE_MNG_ADDR; 609 prop->mmu_cache_mng_size = MMU_CACHE_MNG_SIZE; 610 611 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR; 612 if (hdev->pldm) 613 prop->mmu_pgt_size = 0x800000; /* 8MB */ 614 else 615 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE; 616 prop->mmu_pte_size = HL_PTE_SIZE; 617 prop->dram_page_size = PAGE_SIZE_2MB; 618 prop->device_mem_alloc_default_page_size = prop->dram_page_size; 619 prop->dram_supports_virtual_memory = false; 620 621 prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; 622 prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; 623 prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; 624 prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; 625 prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; 626 prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; 627 prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; 628 prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; 629 prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; 630 prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; 631 prop->pmmu.start_addr = VA_HOST_SPACE_START; 632 prop->pmmu.end_addr = 633 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; 634 prop->pmmu.page_size = PAGE_SIZE_4KB; 635 prop->pmmu.num_hops = MMU_ARCH_5_HOPS; 636 prop->pmmu.last_mask = LAST_MASK; 637 /* TODO: will be duplicated until implementing per-MMU props */ 638 prop->pmmu.hop_table_size = HOP_TABLE_SIZE_512_PTE; 639 prop->pmmu.hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; 640 641 /* PMMU and HPMMU are the same except of page size */ 642 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu)); 643 prop->pmmu_huge.page_size = PAGE_SIZE_2MB; 644 645 /* shifts and masks are the same in PMMU and DMMU */ 646 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu)); 647 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2); 648 prop->dmmu.end_addr = VA_HOST_SPACE_END; 649 prop->dmmu.page_size = PAGE_SIZE_2MB; 650 prop->dmmu.pgt_size = prop->mmu_pgt_size; 651 652 prop->cfg_size = CFG_SIZE; 653 prop->max_asid = MAX_ASID; 654 prop->num_of_events = GAUDI_EVENT_SIZE; 655 prop->max_num_of_engines = GAUDI_ENGINE_ID_SIZE; 656 prop->tpc_enabled_mask = TPC_ENABLED_MASK; 657 658 set_default_power_values(hdev); 659 660 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT; 661 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE; 662 663 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; 664 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; 665 666 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 667 CARD_NAME_MAX_LEN); 668 669 prop->max_pending_cs = GAUDI_MAX_PENDING_CS; 670 671 prop->first_available_user_sob[HL_GAUDI_WS_DCORE] = 672 prop->sync_stream_first_sob + 673 (num_sync_stream_queues * HL_RSVD_SOBS); 674 prop->first_available_user_mon[HL_GAUDI_WS_DCORE] = 675 prop->sync_stream_first_mon + 676 (num_sync_stream_queues * HL_RSVD_MONS); 677 678 prop->first_available_user_interrupt = USHRT_MAX; 679 prop->tpc_interrupt_id = USHRT_MAX; 680 681 /* single msi */ 682 prop->eq_interrupt_id = 0; 683 684 for (i = 0 ; i < HL_MAX_DCORES ; i++) 685 prop->first_available_cq[i] = USHRT_MAX; 686 687 prop->fw_cpu_boot_dev_sts0_valid = false; 688 prop->fw_cpu_boot_dev_sts1_valid = false; 689 prop->hard_reset_done_by_fw = false; 690 prop->gic_interrupts_enable = true; 691 692 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 693 694 prop->clk_pll_index = HL_GAUDI_MME_PLL; 695 prop->max_freq_value = GAUDI_MAX_CLK_FREQ; 696 697 prop->use_get_power_for_reset_history = true; 698 699 prop->configurable_stop_on_err = true; 700 701 prop->set_max_power_on_device_init = true; 702 703 prop->dma_mask = 48; 704 705 prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; 706 707 return 0; 708 } 709 710 static int gaudi_pci_bars_map(struct hl_device *hdev) 711 { 712 static const char * const name[] = {"SRAM", "CFG", "HBM"}; 713 bool is_wc[3] = {false, false, true}; 714 int rc; 715 716 rc = hl_pci_bars_map(hdev, name, is_wc); 717 if (rc) 718 return rc; 719 720 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] + 721 (CFG_BASE - SPI_FLASH_BASE_ADDR); 722 723 return 0; 724 } 725 726 static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr) 727 { 728 struct gaudi_device *gaudi = hdev->asic_specific; 729 struct hl_inbound_pci_region pci_region; 730 u64 old_addr = addr; 731 int rc; 732 733 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr)) 734 return old_addr; 735 736 if (hdev->asic_prop.iatu_done_by_fw) 737 return U64_MAX; 738 739 /* Inbound Region 2 - Bar 4 - Point to HBM */ 740 pci_region.mode = PCI_BAR_MATCH_MODE; 741 pci_region.bar = HBM_BAR_ID; 742 pci_region.addr = addr; 743 rc = hl_pci_set_inbound_region(hdev, 2, &pci_region); 744 if (rc) 745 return U64_MAX; 746 747 if (gaudi) { 748 old_addr = gaudi->hbm_bar_cur_addr; 749 gaudi->hbm_bar_cur_addr = addr; 750 } 751 752 return old_addr; 753 } 754 755 static int gaudi_init_iatu(struct hl_device *hdev) 756 { 757 struct hl_inbound_pci_region inbound_region; 758 struct hl_outbound_pci_region outbound_region; 759 int rc; 760 761 if (hdev->asic_prop.iatu_done_by_fw) 762 return 0; 763 764 /* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */ 765 inbound_region.mode = PCI_BAR_MATCH_MODE; 766 inbound_region.bar = SRAM_BAR_ID; 767 inbound_region.addr = SRAM_BASE_ADDR; 768 rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region); 769 if (rc) 770 goto done; 771 772 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */ 773 inbound_region.mode = PCI_BAR_MATCH_MODE; 774 inbound_region.bar = CFG_BAR_ID; 775 inbound_region.addr = SPI_FLASH_BASE_ADDR; 776 rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region); 777 if (rc) 778 goto done; 779 780 /* Inbound Region 2 - Bar 4 - Point to HBM */ 781 inbound_region.mode = PCI_BAR_MATCH_MODE; 782 inbound_region.bar = HBM_BAR_ID; 783 inbound_region.addr = DRAM_PHYS_BASE; 784 rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region); 785 if (rc) 786 goto done; 787 788 /* Outbound Region 0 - Point to Host */ 789 outbound_region.addr = HOST_PHYS_BASE; 790 outbound_region.size = HOST_PHYS_SIZE; 791 rc = hl_pci_set_outbound_region(hdev, &outbound_region); 792 793 done: 794 return rc; 795 } 796 797 static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev) 798 { 799 return RREG32(mmHW_STATE); 800 } 801 802 static int gaudi_early_init(struct hl_device *hdev) 803 { 804 struct asic_fixed_properties *prop = &hdev->asic_prop; 805 struct pci_dev *pdev = hdev->pdev; 806 resource_size_t pci_bar_size; 807 u32 fw_boot_status; 808 int rc; 809 810 rc = gaudi_set_fixed_properties(hdev); 811 if (rc) { 812 dev_err(hdev->dev, "Failed setting fixed properties\n"); 813 return rc; 814 } 815 816 /* Check BAR sizes */ 817 pci_bar_size = pci_resource_len(pdev, SRAM_BAR_ID); 818 819 if (pci_bar_size != SRAM_BAR_SIZE) { 820 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 821 SRAM_BAR_ID, &pci_bar_size, SRAM_BAR_SIZE); 822 rc = -ENODEV; 823 goto free_queue_props; 824 } 825 826 pci_bar_size = pci_resource_len(pdev, CFG_BAR_ID); 827 828 if (pci_bar_size != CFG_BAR_SIZE) { 829 dev_err(hdev->dev, "Not " HL_NAME "? BAR %d size %pa, expecting %llu\n", 830 CFG_BAR_ID, &pci_bar_size, CFG_BAR_SIZE); 831 rc = -ENODEV; 832 goto free_queue_props; 833 } 834 835 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); 836 hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID); 837 838 /* If FW security is enabled at this point it means no access to ELBI */ 839 if (hdev->asic_prop.fw_security_enabled) { 840 hdev->asic_prop.iatu_done_by_fw = true; 841 842 /* 843 * GIC-security-bit can ONLY be set by CPUCP, so in this stage 844 * decision can only be taken based on PCI ID security. 845 */ 846 hdev->asic_prop.gic_interrupts_enable = false; 847 goto pci_init; 848 } 849 850 rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, 851 &fw_boot_status); 852 if (rc) 853 goto free_queue_props; 854 855 /* Check whether FW is configuring iATU */ 856 if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 857 (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 858 hdev->asic_prop.iatu_done_by_fw = true; 859 860 pci_init: 861 rc = hl_pci_init(hdev); 862 if (rc) 863 goto free_queue_props; 864 865 /* Before continuing in the initialization, we need to read the preboot 866 * version to determine whether we run with a security-enabled firmware 867 */ 868 rc = hl_fw_read_preboot_status(hdev); 869 if (rc) { 870 if (hdev->reset_on_preboot_fail) 871 /* we are already on failure flow, so don't check if hw_fini fails. */ 872 hdev->asic_funcs->hw_fini(hdev, true, false); 873 goto pci_fini; 874 } 875 876 if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) { 877 dev_dbg(hdev->dev, "H/W state is dirty, must reset before initializing\n"); 878 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 879 if (rc) { 880 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 881 goto pci_fini; 882 } 883 } 884 885 return 0; 886 887 pci_fini: 888 hl_pci_fini(hdev); 889 free_queue_props: 890 kfree(hdev->asic_prop.hw_queues_props); 891 return rc; 892 } 893 894 static int gaudi_early_fini(struct hl_device *hdev) 895 { 896 kfree(hdev->asic_prop.hw_queues_props); 897 hl_pci_fini(hdev); 898 899 return 0; 900 } 901 902 /** 903 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values 904 * 905 * @hdev: pointer to hl_device structure 906 * 907 */ 908 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 909 { 910 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 911 struct asic_fixed_properties *prop = &hdev->asic_prop; 912 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 913 int rc; 914 915 if ((hdev->fw_components & FW_TYPE_LINUX) && 916 (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 917 struct gaudi_device *gaudi = hdev->asic_specific; 918 919 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 920 return 0; 921 922 rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); 923 924 if (rc) 925 return rc; 926 927 freq = pll_freq_arr[2]; 928 } else { 929 /* Backward compatibility */ 930 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2); 931 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2); 932 nr = RREG32(mmPSOC_CPU_PLL_NR); 933 nf = RREG32(mmPSOC_CPU_PLL_NF); 934 od = RREG32(mmPSOC_CPU_PLL_OD); 935 936 if (div_sel == DIV_SEL_REF_CLK || 937 div_sel == DIV_SEL_DIVIDED_REF) { 938 if (div_sel == DIV_SEL_REF_CLK) 939 freq = PLL_REF_CLK; 940 else 941 freq = PLL_REF_CLK / (div_fctr + 1); 942 } else if (div_sel == DIV_SEL_PLL_CLK || 943 div_sel == DIV_SEL_DIVIDED_PLL) { 944 pll_clk = PLL_REF_CLK * (nf + 1) / 945 ((nr + 1) * (od + 1)); 946 if (div_sel == DIV_SEL_PLL_CLK) 947 freq = pll_clk; 948 else 949 freq = pll_clk / (div_fctr + 1); 950 } else { 951 dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 952 freq = 0; 953 } 954 } 955 956 prop->psoc_timestamp_frequency = freq; 957 prop->psoc_pci_pll_nr = nr; 958 prop->psoc_pci_pll_nf = nf; 959 prop->psoc_pci_pll_od = od; 960 prop->psoc_pci_pll_div_factor = div_fctr; 961 962 return 0; 963 } 964 965 static int _gaudi_init_tpc_mem(struct hl_device *hdev, 966 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size) 967 { 968 struct asic_fixed_properties *prop = &hdev->asic_prop; 969 struct packet_lin_dma *init_tpc_mem_pkt; 970 struct hl_cs_job *job; 971 struct hl_cb *cb; 972 u64 dst_addr; 973 u32 cb_size, ctl; 974 u8 tpc_id; 975 int rc; 976 977 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 978 if (!cb) 979 return -EFAULT; 980 981 init_tpc_mem_pkt = cb->kernel_address; 982 cb_size = sizeof(*init_tpc_mem_pkt); 983 memset(init_tpc_mem_pkt, 0, cb_size); 984 985 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size); 986 987 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 988 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 989 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 990 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 991 992 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 993 994 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 995 996 /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 997 dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 998 round_up(prop->sram_user_base_address, SZ_8K)); 999 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 1000 1001 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 1002 if (!job) { 1003 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1004 rc = -ENOMEM; 1005 goto release_cb; 1006 } 1007 1008 job->id = 0; 1009 job->user_cb = cb; 1010 atomic_inc(&job->user_cb->cs_cnt); 1011 job->user_cb_size = cb_size; 1012 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 1013 job->patched_cb = job->user_cb; 1014 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 1015 1016 hl_debugfs_add_job(hdev, job); 1017 1018 rc = gaudi_send_job_on_qman0(hdev, job); 1019 1020 if (rc) 1021 goto free_job; 1022 1023 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 1024 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id); 1025 if (rc) 1026 break; 1027 } 1028 1029 free_job: 1030 hl_userptr_delete_list(hdev, &job->userptr_list); 1031 hl_debugfs_remove_job(hdev, job); 1032 kfree(job); 1033 atomic_dec(&cb->cs_cnt); 1034 1035 release_cb: 1036 hl_cb_put(cb); 1037 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1038 1039 return rc; 1040 } 1041 1042 /* 1043 * gaudi_init_tpc_mem() - Initialize TPC memories. 1044 * @hdev: Pointer to hl_device structure. 1045 * 1046 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories. 1047 * 1048 * Return: 0 for success, negative value for error. 1049 */ 1050 static int gaudi_init_tpc_mem(struct hl_device *hdev) 1051 { 1052 const struct firmware *fw; 1053 size_t fw_size; 1054 void *cpu_addr; 1055 dma_addr_t dma_handle; 1056 int rc, count = 5; 1057 1058 again: 1059 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev); 1060 if (rc == -EINTR && count-- > 0) { 1061 msleep(50); 1062 goto again; 1063 } 1064 1065 if (rc) { 1066 dev_err(hdev->dev, "Failed to load firmware file %s\n", 1067 GAUDI_TPC_FW_FILE); 1068 goto out; 1069 } 1070 1071 fw_size = fw->size; 1072 cpu_addr = hl_asic_dma_alloc_coherent(hdev, fw_size, &dma_handle, GFP_KERNEL | __GFP_ZERO); 1073 if (!cpu_addr) { 1074 dev_err(hdev->dev, 1075 "Failed to allocate %zu of dma memory for TPC kernel\n", 1076 fw_size); 1077 rc = -ENOMEM; 1078 goto out; 1079 } 1080 1081 memcpy(cpu_addr, fw->data, fw_size); 1082 1083 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size); 1084 1085 hl_asic_dma_free_coherent(hdev, fw->size, cpu_addr, dma_handle); 1086 1087 out: 1088 release_firmware(fw); 1089 return rc; 1090 } 1091 1092 static void gaudi_collective_map_sobs(struct hl_device *hdev, u32 stream) 1093 { 1094 struct gaudi_device *gaudi = hdev->asic_specific; 1095 struct gaudi_collective_properties *prop = &gaudi->collective_props; 1096 struct hl_hw_queue *q; 1097 u32 i, sob_id, sob_group_id, queue_id; 1098 1099 /* Iterate through SOB groups and assign a SOB for each slave queue */ 1100 sob_group_id = 1101 stream * HL_RSVD_SOBS + prop->curr_sob_group_idx[stream]; 1102 sob_id = prop->hw_sob_group[sob_group_id].base_sob_id; 1103 1104 queue_id = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1105 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 1106 q = &hdev->kernel_queues[queue_id + (4 * i)]; 1107 q->sync_stream_prop.collective_sob_id = sob_id + i; 1108 } 1109 1110 /* Both DMA5 and TPC7 use the same resources since only a single 1111 * engine need to participate in the reduction process 1112 */ 1113 queue_id = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1114 q = &hdev->kernel_queues[queue_id]; 1115 q->sync_stream_prop.collective_sob_id = 1116 sob_id + NIC_NUMBER_OF_ENGINES; 1117 1118 queue_id = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1119 q = &hdev->kernel_queues[queue_id]; 1120 q->sync_stream_prop.collective_sob_id = 1121 sob_id + NIC_NUMBER_OF_ENGINES; 1122 } 1123 1124 static void gaudi_sob_group_hw_reset(struct kref *ref) 1125 { 1126 struct gaudi_hw_sob_group *hw_sob_group = 1127 container_of(ref, struct gaudi_hw_sob_group, kref); 1128 struct hl_device *hdev = hw_sob_group->hdev; 1129 int i; 1130 1131 for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++) 1132 WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 1133 (hw_sob_group->base_sob_id * 4) + (i * 4)), 0); 1134 1135 kref_init(&hw_sob_group->kref); 1136 } 1137 1138 static void gaudi_sob_group_reset_error(struct kref *ref) 1139 { 1140 struct gaudi_hw_sob_group *hw_sob_group = 1141 container_of(ref, struct gaudi_hw_sob_group, kref); 1142 struct hl_device *hdev = hw_sob_group->hdev; 1143 1144 dev_crit(hdev->dev, 1145 "SOB release shouldn't be called here, base_sob_id: %d\n", 1146 hw_sob_group->base_sob_id); 1147 } 1148 1149 static void gaudi_collective_mstr_sob_mask_set(struct gaudi_device *gaudi) 1150 { 1151 struct gaudi_collective_properties *prop; 1152 int i; 1153 1154 prop = &gaudi->collective_props; 1155 1156 memset(prop->mstr_sob_mask, 0, sizeof(prop->mstr_sob_mask)); 1157 1158 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) 1159 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + i)) 1160 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1161 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1162 /* Set collective engine bit */ 1163 prop->mstr_sob_mask[i / HL_MAX_SOBS_PER_MONITOR] |= 1164 BIT(i % HL_MAX_SOBS_PER_MONITOR); 1165 } 1166 1167 static int gaudi_collective_init(struct hl_device *hdev) 1168 { 1169 u32 i, sob_id, reserved_sobs_per_group; 1170 struct gaudi_collective_properties *prop; 1171 struct gaudi_device *gaudi; 1172 1173 gaudi = hdev->asic_specific; 1174 prop = &gaudi->collective_props; 1175 sob_id = hdev->asic_prop.collective_first_sob; 1176 1177 /* First sob in group must be aligned to HL_MAX_SOBS_PER_MONITOR */ 1178 reserved_sobs_per_group = 1179 ALIGN(NUMBER_OF_SOBS_IN_GRP, HL_MAX_SOBS_PER_MONITOR); 1180 1181 /* Init SOB groups */ 1182 for (i = 0 ; i < NUM_SOB_GROUPS; i++) { 1183 prop->hw_sob_group[i].hdev = hdev; 1184 prop->hw_sob_group[i].base_sob_id = sob_id; 1185 sob_id += reserved_sobs_per_group; 1186 gaudi_sob_group_hw_reset(&prop->hw_sob_group[i].kref); 1187 } 1188 1189 for (i = 0 ; i < QMAN_STREAMS; i++) { 1190 prop->next_sob_group_val[i] = 1; 1191 prop->curr_sob_group_idx[i] = 0; 1192 gaudi_collective_map_sobs(hdev, i); 1193 } 1194 1195 gaudi_collective_mstr_sob_mask_set(gaudi); 1196 1197 return 0; 1198 } 1199 1200 static void gaudi_reset_sob_group(struct hl_device *hdev, u16 sob_group) 1201 { 1202 struct gaudi_device *gaudi = hdev->asic_specific; 1203 struct gaudi_collective_properties *cprop = &gaudi->collective_props; 1204 1205 kref_put(&cprop->hw_sob_group[sob_group].kref, 1206 gaudi_sob_group_hw_reset); 1207 } 1208 1209 static void gaudi_collective_master_init_job(struct hl_device *hdev, 1210 struct hl_cs_job *job, u32 stream, u32 sob_group_offset) 1211 { 1212 u32 master_sob_base, master_monitor, queue_id, cb_size = 0; 1213 struct gaudi_collective_properties *cprop; 1214 struct hl_gen_wait_properties wait_prop; 1215 struct hl_sync_stream_properties *prop; 1216 struct gaudi_device *gaudi; 1217 1218 gaudi = hdev->asic_specific; 1219 cprop = &gaudi->collective_props; 1220 queue_id = job->hw_queue_id; 1221 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1222 1223 master_sob_base = 1224 cprop->hw_sob_group[sob_group_offset].base_sob_id; 1225 master_monitor = prop->collective_mstr_mon_id[0]; 1226 1227 cprop->hw_sob_group[sob_group_offset].queue_id = queue_id; 1228 1229 dev_dbg(hdev->dev, 1230 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1231 master_sob_base, cprop->mstr_sob_mask[0], 1232 cprop->next_sob_group_val[stream], 1233 master_monitor, queue_id); 1234 1235 wait_prop.data = (void *) job->patched_cb; 1236 wait_prop.sob_base = master_sob_base; 1237 wait_prop.sob_mask = cprop->mstr_sob_mask[0]; 1238 wait_prop.sob_val = cprop->next_sob_group_val[stream]; 1239 wait_prop.mon_id = master_monitor; 1240 wait_prop.q_idx = queue_id; 1241 wait_prop.size = cb_size; 1242 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1243 1244 master_sob_base += HL_MAX_SOBS_PER_MONITOR; 1245 master_monitor = prop->collective_mstr_mon_id[1]; 1246 1247 dev_dbg(hdev->dev, 1248 "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n", 1249 master_sob_base, cprop->mstr_sob_mask[1], 1250 cprop->next_sob_group_val[stream], 1251 master_monitor, queue_id); 1252 1253 wait_prop.sob_base = master_sob_base; 1254 wait_prop.sob_mask = cprop->mstr_sob_mask[1]; 1255 wait_prop.mon_id = master_monitor; 1256 wait_prop.size = cb_size; 1257 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1258 } 1259 1260 static void gaudi_collective_slave_init_job(struct hl_device *hdev, 1261 struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl) 1262 { 1263 struct hl_gen_wait_properties wait_prop; 1264 struct hl_sync_stream_properties *prop; 1265 u32 queue_id, cb_size = 0; 1266 1267 queue_id = job->hw_queue_id; 1268 prop = &hdev->kernel_queues[queue_id].sync_stream_prop; 1269 1270 if (job->cs->encaps_signals) { 1271 /* use the encaps signal handle store earlier in the flow 1272 * and set the SOB information from the encaps 1273 * signals handle 1274 */ 1275 hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job, 1276 cs_cmpl); 1277 1278 dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u, wait for sob_val: %u\n", 1279 job->cs->sequence, 1280 cs_cmpl->hw_sob->sob_id, 1281 cs_cmpl->sob_val); 1282 } 1283 1284 /* Add to wait CBs using slave monitor */ 1285 wait_prop.data = (void *) job->user_cb; 1286 wait_prop.sob_base = cs_cmpl->hw_sob->sob_id; 1287 wait_prop.sob_mask = 0x1; 1288 wait_prop.sob_val = cs_cmpl->sob_val; 1289 wait_prop.mon_id = prop->collective_slave_mon_id; 1290 wait_prop.q_idx = queue_id; 1291 wait_prop.size = cb_size; 1292 1293 dev_dbg(hdev->dev, 1294 "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n", 1295 cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, 1296 prop->collective_slave_mon_id, queue_id); 1297 1298 cb_size += gaudi_gen_wait_cb(hdev, &wait_prop); 1299 1300 dev_dbg(hdev->dev, 1301 "generate signal CB, sob_id: %d, sob val: 1, q_idx: %d\n", 1302 prop->collective_sob_id, queue_id); 1303 1304 cb_size += gaudi_gen_signal_cb(hdev, job->user_cb, 1305 prop->collective_sob_id, cb_size, false); 1306 } 1307 1308 static int gaudi_collective_wait_init_cs(struct hl_cs *cs) 1309 { 1310 struct hl_cs_compl *signal_cs_cmpl = 1311 container_of(cs->signal_fence, struct hl_cs_compl, base_fence); 1312 struct hl_cs_compl *cs_cmpl = 1313 container_of(cs->fence, struct hl_cs_compl, base_fence); 1314 struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl; 1315 struct gaudi_collective_properties *cprop; 1316 u32 stream, queue_id, sob_group_offset; 1317 struct gaudi_device *gaudi; 1318 struct hl_device *hdev; 1319 struct hl_cs_job *job; 1320 struct hl_ctx *ctx; 1321 1322 ctx = cs->ctx; 1323 hdev = ctx->hdev; 1324 gaudi = hdev->asic_specific; 1325 cprop = &gaudi->collective_props; 1326 1327 if (cs->encaps_signals) { 1328 cs_cmpl->hw_sob = handle->hw_sob; 1329 /* at this checkpoint we only need the hw_sob pointer 1330 * for the completion check before start going over the jobs 1331 * of the master/slaves, the sob_value will be taken later on 1332 * in gaudi_collective_slave_init_job depends on each 1333 * job wait offset value. 1334 */ 1335 cs_cmpl->sob_val = 0; 1336 } else { 1337 /* copy the SOB id and value of the signal CS */ 1338 cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob; 1339 cs_cmpl->sob_val = signal_cs_cmpl->sob_val; 1340 } 1341 1342 /* check again if the signal cs already completed. 1343 * if yes then don't send any wait cs since the hw_sob 1344 * could be in reset already. if signal is not completed 1345 * then get refcount to hw_sob to prevent resetting the sob 1346 * while wait cs is not submitted. 1347 * note that this check is protected by two locks, 1348 * hw queue lock and completion object lock, 1349 * and the same completion object lock also protects 1350 * the hw_sob reset handler function. 1351 * The hw_queue lock prevent out of sync of hw_sob 1352 * refcount value, changed by signal/wait flows. 1353 */ 1354 spin_lock(&signal_cs_cmpl->lock); 1355 1356 if (completion_done(&cs->signal_fence->completion)) { 1357 spin_unlock(&signal_cs_cmpl->lock); 1358 return -EINVAL; 1359 } 1360 /* Increment kref since all slave queues are now waiting on it */ 1361 kref_get(&cs_cmpl->hw_sob->kref); 1362 1363 spin_unlock(&signal_cs_cmpl->lock); 1364 1365 /* Calculate the stream from collective master queue (1st job) */ 1366 job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node); 1367 stream = job->hw_queue_id % 4; 1368 sob_group_offset = 1369 stream * HL_RSVD_SOBS + cprop->curr_sob_group_idx[stream]; 1370 1371 list_for_each_entry(job, &cs->job_list, cs_node) { 1372 queue_id = job->hw_queue_id; 1373 1374 if (hdev->kernel_queues[queue_id].collective_mode == 1375 HL_COLLECTIVE_MASTER) 1376 gaudi_collective_master_init_job(hdev, job, stream, 1377 sob_group_offset); 1378 else 1379 gaudi_collective_slave_init_job(hdev, job, cs_cmpl); 1380 } 1381 1382 cs_cmpl->sob_group = sob_group_offset; 1383 1384 /* Handle sob group kref and wraparound */ 1385 kref_get(&cprop->hw_sob_group[sob_group_offset].kref); 1386 cprop->next_sob_group_val[stream]++; 1387 1388 if (cprop->next_sob_group_val[stream] == HL_MAX_SOB_VAL) { 1389 /* 1390 * Decrement as we reached the max value. 1391 * The release function won't be called here as we've 1392 * just incremented the refcount. 1393 */ 1394 kref_put(&cprop->hw_sob_group[sob_group_offset].kref, 1395 gaudi_sob_group_reset_error); 1396 cprop->next_sob_group_val[stream] = 1; 1397 /* only two SOBs are currently in use */ 1398 cprop->curr_sob_group_idx[stream] = 1399 (cprop->curr_sob_group_idx[stream] + 1) & 1400 (HL_RSVD_SOBS - 1); 1401 1402 gaudi_collective_map_sobs(hdev, stream); 1403 1404 dev_dbg(hdev->dev, "switched to SOB group %d, stream: %d\n", 1405 cprop->curr_sob_group_idx[stream], stream); 1406 } 1407 1408 mb(); 1409 hl_fence_put(cs->signal_fence); 1410 cs->signal_fence = NULL; 1411 1412 return 0; 1413 } 1414 1415 static u32 gaudi_get_patched_cb_extra_size(u32 user_cb_size) 1416 { 1417 u32 cacheline_end, additional_commands; 1418 1419 cacheline_end = round_up(user_cb_size, DEVICE_CACHE_LINE_SIZE); 1420 additional_commands = sizeof(struct packet_msg_prot) * 2; 1421 1422 if (user_cb_size + additional_commands > cacheline_end) 1423 return cacheline_end - user_cb_size + additional_commands; 1424 else 1425 return additional_commands; 1426 } 1427 1428 static int gaudi_collective_wait_create_job(struct hl_device *hdev, 1429 struct hl_ctx *ctx, struct hl_cs *cs, 1430 enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id, 1431 u32 encaps_signal_offset) 1432 { 1433 struct hw_queue_properties *hw_queue_prop; 1434 struct hl_cs_counters_atomic *cntr; 1435 struct hl_cs_job *job; 1436 struct hl_cb *cb; 1437 u32 cb_size; 1438 bool patched_cb; 1439 1440 cntr = &hdev->aggregated_cs_counters; 1441 1442 if (mode == HL_COLLECTIVE_MASTER) { 1443 /* CB size of collective master queue contains 1444 * 4 msg short packets for monitor 1 configuration 1445 * 1 fence packet 1446 * 4 msg short packets for monitor 2 configuration 1447 * 1 fence packet 1448 * 2 msg prot packets for completion and MSI 1449 */ 1450 cb_size = sizeof(struct packet_msg_short) * 8 + 1451 sizeof(struct packet_fence) * 2 + 1452 sizeof(struct packet_msg_prot) * 2; 1453 patched_cb = true; 1454 } else { 1455 /* CB size of collective slave queues contains 1456 * 4 msg short packets for monitor configuration 1457 * 1 fence packet 1458 * 1 additional msg short packet for sob signal 1459 */ 1460 cb_size = sizeof(struct packet_msg_short) * 5 + 1461 sizeof(struct packet_fence); 1462 patched_cb = false; 1463 } 1464 1465 hw_queue_prop = &hdev->asic_prop.hw_queues_props[queue_id]; 1466 job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true); 1467 if (!job) { 1468 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1469 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1470 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1471 return -ENOMEM; 1472 } 1473 1474 /* Allocate internal mapped CB for non patched CBs */ 1475 cb = hl_cb_kernel_create(hdev, cb_size, !patched_cb); 1476 if (!cb) { 1477 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1478 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1479 kfree(job); 1480 return -EFAULT; 1481 } 1482 1483 job->id = 0; 1484 job->cs = cs; 1485 job->user_cb = cb; 1486 atomic_inc(&job->user_cb->cs_cnt); 1487 job->user_cb_size = cb_size; 1488 job->hw_queue_id = queue_id; 1489 1490 /* since its guaranteed to have only one chunk in the collective wait 1491 * cs, we can use this chunk to set the encapsulated signal offset 1492 * in the jobs. 1493 */ 1494 if (cs->encaps_signals) 1495 job->encaps_sig_wait_offset = encaps_signal_offset; 1496 1497 /* 1498 * No need in parsing, user CB is the patched CB. 1499 * We call hl_cb_destroy() out of two reasons - we don't need 1500 * the CB in the CB idr anymore and to decrement its refcount as 1501 * it was incremented inside hl_cb_kernel_create(). 1502 */ 1503 if (patched_cb) 1504 job->patched_cb = job->user_cb; 1505 else 1506 job->patched_cb = NULL; 1507 1508 job->job_cb_size = job->user_cb_size; 1509 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1510 1511 /* increment refcount as for external queues we get completion */ 1512 if (hw_queue_prop->type == QUEUE_TYPE_EXT) 1513 cs_get(cs); 1514 1515 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1516 1517 list_add_tail(&job->cs_node, &cs->job_list); 1518 1519 hl_debugfs_add_job(hdev, job); 1520 1521 return 0; 1522 } 1523 1524 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev, 1525 struct hl_ctx *ctx, struct hl_cs *cs, 1526 u32 wait_queue_id, u32 collective_engine_id, 1527 u32 encaps_signal_offset) 1528 { 1529 struct gaudi_device *gaudi = hdev->asic_specific; 1530 struct hw_queue_properties *hw_queue_prop; 1531 u32 queue_id, collective_queue, num_jobs; 1532 u32 stream, nic_queue, nic_idx = 0; 1533 bool skip; 1534 int i, rc = 0; 1535 1536 /* Verify wait queue id is configured as master */ 1537 hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id]; 1538 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 1539 dev_err(hdev->dev, 1540 "Queue %d is not configured as collective master\n", 1541 wait_queue_id); 1542 return -EINVAL; 1543 } 1544 1545 /* Verify engine id is supported */ 1546 if (collective_engine_id != GAUDI_ENGINE_ID_DMA_5 && 1547 collective_engine_id != GAUDI_ENGINE_ID_TPC_7) { 1548 dev_err(hdev->dev, 1549 "Collective wait does not support engine %u\n", 1550 collective_engine_id); 1551 return -EINVAL; 1552 } 1553 1554 stream = wait_queue_id % 4; 1555 1556 if (collective_engine_id == GAUDI_ENGINE_ID_DMA_5) 1557 collective_queue = GAUDI_QUEUE_ID_DMA_5_0 + stream; 1558 else 1559 collective_queue = GAUDI_QUEUE_ID_TPC_7_0 + stream; 1560 1561 num_jobs = NUMBER_OF_SOBS_IN_GRP + 1; 1562 nic_queue = GAUDI_QUEUE_ID_NIC_0_0 + stream; 1563 1564 /* First job goes to the collective master queue, it will wait for 1565 * the collective slave queues to finish execution. 1566 * The synchronization is done using two monitors: 1567 * First monitor for NICs 0-7, second monitor for NICs 8-9 and the 1568 * reduction engine (DMA5/TPC7). 1569 * 1570 * Rest of the jobs goes to the collective slave queues which will 1571 * all wait for the user to signal sob 'cs_cmpl->sob_val'. 1572 */ 1573 for (i = 0 ; i < num_jobs ; i++) { 1574 if (i == 0) { 1575 queue_id = wait_queue_id; 1576 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1577 HL_COLLECTIVE_MASTER, queue_id, 1578 wait_queue_id, encaps_signal_offset); 1579 } else { 1580 if (nic_idx < NIC_NUMBER_OF_ENGINES) { 1581 if (gaudi->hw_cap_initialized & 1582 BIT(HW_CAP_NIC_SHIFT + nic_idx)) 1583 skip = false; 1584 else 1585 skip = true; 1586 1587 queue_id = nic_queue; 1588 nic_queue += 4; 1589 nic_idx++; 1590 1591 if (skip) 1592 continue; 1593 } else { 1594 queue_id = collective_queue; 1595 } 1596 1597 rc = gaudi_collective_wait_create_job(hdev, ctx, cs, 1598 HL_COLLECTIVE_SLAVE, queue_id, 1599 wait_queue_id, encaps_signal_offset); 1600 } 1601 1602 if (rc) 1603 return rc; 1604 } 1605 1606 return rc; 1607 } 1608 1609 static int gaudi_late_init(struct hl_device *hdev) 1610 { 1611 struct gaudi_device *gaudi = hdev->asic_specific; 1612 int rc; 1613 1614 rc = gaudi->cpucp_info_get(hdev); 1615 if (rc) { 1616 dev_err(hdev->dev, "Failed to get cpucp info\n"); 1617 return rc; 1618 } 1619 1620 if ((hdev->card_type == cpucp_card_type_pci) && 1621 (hdev->nic_ports_mask & 0x3)) { 1622 dev_info(hdev->dev, 1623 "PCI card detected, only 8 ports are enabled\n"); 1624 hdev->nic_ports_mask &= ~0x3; 1625 1626 /* Stop and disable unused NIC QMANs */ 1627 WREG32(mmNIC0_QM0_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1628 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1629 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1630 1631 WREG32(mmNIC0_QM1_GLBL_CFG1, NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 1632 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 1633 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 1634 1635 WREG32(mmNIC0_QM0_GLBL_CFG0, 0); 1636 WREG32(mmNIC0_QM1_GLBL_CFG0, 0); 1637 1638 gaudi->hw_cap_initialized &= ~(HW_CAP_NIC0 | HW_CAP_NIC1); 1639 } 1640 1641 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 0x0); 1642 if (rc) 1643 return rc; 1644 1645 /* Scrub both SRAM and DRAM */ 1646 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1647 if (rc) 1648 goto disable_pci_access; 1649 1650 rc = gaudi_fetch_psoc_frequency(hdev); 1651 if (rc) { 1652 dev_err(hdev->dev, "Failed to fetch psoc frequency\n"); 1653 goto disable_pci_access; 1654 } 1655 1656 rc = gaudi_mmu_clear_pgt_range(hdev); 1657 if (rc) { 1658 dev_err(hdev->dev, "Failed to clear MMU page tables range\n"); 1659 goto disable_pci_access; 1660 } 1661 1662 rc = gaudi_init_tpc_mem(hdev); 1663 if (rc) { 1664 dev_err(hdev->dev, "Failed to initialize TPC memories\n"); 1665 goto disable_pci_access; 1666 } 1667 1668 rc = gaudi_collective_init(hdev); 1669 if (rc) { 1670 dev_err(hdev->dev, "Failed to init collective\n"); 1671 goto disable_pci_access; 1672 } 1673 1674 /* We only support a single ASID for the user, so for the sake of optimization, just 1675 * initialize the ASID one time during device initialization with the fixed value of 1 1676 */ 1677 gaudi_mmu_prepare(hdev, 1); 1678 1679 hl_fw_set_pll_profile(hdev); 1680 1681 return 0; 1682 1683 disable_pci_access: 1684 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 1685 1686 return rc; 1687 } 1688 1689 static void gaudi_late_fini(struct hl_device *hdev) 1690 { 1691 hl_hwmon_release_resources(hdev); 1692 } 1693 1694 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) 1695 { 1696 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr; 1697 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}; 1698 int i, j, rc = 0; 1699 1700 /* 1701 * The device CPU works with 40-bits addresses, while bit 39 must be set 1702 * to '1' when accessing the host. 1703 * Bits 49:39 of the full host address are saved for a later 1704 * configuration of the HW to perform extension to 50 bits. 1705 * Because there is a single HW register that holds the extension bits, 1706 * these bits must be identical in all allocated range. 1707 */ 1708 1709 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) { 1710 virt_addr_arr[i] = hl_asic_dma_alloc_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, 1711 &dma_addr_arr[i], 1712 GFP_KERNEL | __GFP_ZERO); 1713 if (!virt_addr_arr[i]) { 1714 rc = -ENOMEM; 1715 goto free_dma_mem_arr; 1716 } 1717 1718 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1; 1719 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) == 1720 GAUDI_CPU_PCI_MSB_ADDR(end_addr)) 1721 break; 1722 } 1723 1724 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) { 1725 dev_err(hdev->dev, 1726 "MSB of CPU accessible DMA memory are not identical in all range\n"); 1727 rc = -EFAULT; 1728 goto free_dma_mem_arr; 1729 } 1730 1731 hdev->cpu_accessible_dma_mem = virt_addr_arr[i]; 1732 hdev->cpu_accessible_dma_address = dma_addr_arr[i]; 1733 hdev->cpu_pci_msb_addr = 1734 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address); 1735 1736 if (!hdev->asic_prop.fw_security_enabled) 1737 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address); 1738 1739 free_dma_mem_arr: 1740 for (j = 0 ; j < i ; j++) 1741 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, virt_addr_arr[j], 1742 dma_addr_arr[j]); 1743 1744 return rc; 1745 } 1746 1747 static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev) 1748 { 1749 struct gaudi_device *gaudi = hdev->asic_specific; 1750 struct gaudi_internal_qman_info *q; 1751 u32 i; 1752 1753 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1754 q = &gaudi->internal_qmans[i]; 1755 if (!q->pq_kernel_addr) 1756 continue; 1757 hl_asic_dma_free_coherent(hdev, q->pq_size, q->pq_kernel_addr, q->pq_dma_addr); 1758 } 1759 } 1760 1761 static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev) 1762 { 1763 struct gaudi_device *gaudi = hdev->asic_specific; 1764 struct gaudi_internal_qman_info *q; 1765 int rc, i; 1766 1767 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) { 1768 if (gaudi_queue_type[i] != QUEUE_TYPE_INT) 1769 continue; 1770 1771 q = &gaudi->internal_qmans[i]; 1772 1773 switch (i) { 1774 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_7_3: 1775 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES; 1776 break; 1777 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3: 1778 q->pq_size = MME_QMAN_SIZE_IN_BYTES; 1779 break; 1780 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3: 1781 q->pq_size = TPC_QMAN_SIZE_IN_BYTES; 1782 break; 1783 case GAUDI_QUEUE_ID_NIC_0_0 ... GAUDI_QUEUE_ID_NIC_9_3: 1784 q->pq_size = NIC_QMAN_SIZE_IN_BYTES; 1785 break; 1786 default: 1787 dev_err(hdev->dev, "Bad internal queue index %d", i); 1788 rc = -EINVAL; 1789 goto free_internal_qmans_pq_mem; 1790 } 1791 1792 q->pq_kernel_addr = hl_asic_dma_alloc_coherent(hdev, q->pq_size, &q->pq_dma_addr, 1793 GFP_KERNEL | __GFP_ZERO); 1794 if (!q->pq_kernel_addr) { 1795 rc = -ENOMEM; 1796 goto free_internal_qmans_pq_mem; 1797 } 1798 } 1799 1800 return 0; 1801 1802 free_internal_qmans_pq_mem: 1803 gaudi_free_internal_qmans_pq_mem(hdev); 1804 return rc; 1805 } 1806 1807 static void gaudi_set_pci_memory_regions(struct hl_device *hdev) 1808 { 1809 struct asic_fixed_properties *prop = &hdev->asic_prop; 1810 struct pci_mem_region *region; 1811 1812 /* CFG */ 1813 region = &hdev->pci_mem_region[PCI_REGION_CFG]; 1814 region->region_base = CFG_BASE; 1815 region->region_size = CFG_SIZE; 1816 region->offset_in_bar = CFG_BASE - SPI_FLASH_BASE_ADDR; 1817 region->bar_size = CFG_BAR_SIZE; 1818 region->bar_id = CFG_BAR_ID; 1819 region->used = 1; 1820 1821 /* SRAM */ 1822 region = &hdev->pci_mem_region[PCI_REGION_SRAM]; 1823 region->region_base = SRAM_BASE_ADDR; 1824 region->region_size = SRAM_SIZE; 1825 region->offset_in_bar = 0; 1826 region->bar_size = SRAM_BAR_SIZE; 1827 region->bar_id = SRAM_BAR_ID; 1828 region->used = 1; 1829 1830 /* DRAM */ 1831 region = &hdev->pci_mem_region[PCI_REGION_DRAM]; 1832 region->region_base = DRAM_PHYS_BASE; 1833 region->region_size = hdev->asic_prop.dram_size; 1834 region->offset_in_bar = 0; 1835 region->bar_size = prop->dram_pci_bar_size; 1836 region->bar_id = HBM_BAR_ID; 1837 region->used = 1; 1838 1839 /* SP SRAM */ 1840 region = &hdev->pci_mem_region[PCI_REGION_SP_SRAM]; 1841 region->region_base = PSOC_SCRATCHPAD_ADDR; 1842 region->region_size = PSOC_SCRATCHPAD_SIZE; 1843 region->offset_in_bar = PSOC_SCRATCHPAD_ADDR - SPI_FLASH_BASE_ADDR; 1844 region->bar_size = CFG_BAR_SIZE; 1845 region->bar_id = CFG_BAR_ID; 1846 region->used = 1; 1847 } 1848 1849 static int gaudi_sw_init(struct hl_device *hdev) 1850 { 1851 struct gaudi_device *gaudi; 1852 u32 i, event_id = 0; 1853 int rc; 1854 1855 /* Allocate device structure */ 1856 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL); 1857 if (!gaudi) 1858 return -ENOMEM; 1859 1860 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) { 1861 if (gaudi_irq_map_table[i].valid) { 1862 if (event_id == GAUDI_EVENT_SIZE) { 1863 dev_err(hdev->dev, 1864 "Event array exceeds the limit of %u events\n", 1865 GAUDI_EVENT_SIZE); 1866 rc = -EINVAL; 1867 goto free_gaudi_device; 1868 } 1869 1870 gaudi->events[event_id++] = 1871 gaudi_irq_map_table[i].fc_id; 1872 } 1873 } 1874 1875 gaudi->cpucp_info_get = gaudi_cpucp_info_get; 1876 1877 hdev->asic_specific = gaudi; 1878 1879 /* Create DMA pool for small allocations */ 1880 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), 1881 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); 1882 if (!hdev->dma_pool) { 1883 dev_err(hdev->dev, "failed to create DMA pool\n"); 1884 rc = -ENOMEM; 1885 goto free_gaudi_device; 1886 } 1887 1888 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev); 1889 if (rc) 1890 goto free_dma_pool; 1891 1892 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1); 1893 if (!hdev->cpu_accessible_dma_pool) { 1894 dev_err(hdev->dev, 1895 "Failed to create CPU accessible DMA pool\n"); 1896 rc = -ENOMEM; 1897 goto free_cpu_dma_mem; 1898 } 1899 1900 rc = gen_pool_add(hdev->cpu_accessible_dma_pool, 1901 (uintptr_t) hdev->cpu_accessible_dma_mem, 1902 HL_CPU_ACCESSIBLE_MEM_SIZE, -1); 1903 if (rc) { 1904 dev_err(hdev->dev, 1905 "Failed to add memory to CPU accessible DMA pool\n"); 1906 rc = -EFAULT; 1907 goto free_cpu_accessible_dma_pool; 1908 } 1909 1910 rc = gaudi_alloc_internal_qmans_pq_mem(hdev); 1911 if (rc) 1912 goto free_cpu_accessible_dma_pool; 1913 1914 spin_lock_init(&gaudi->hw_queues_lock); 1915 1916 hdev->supports_sync_stream = true; 1917 hdev->supports_coresight = true; 1918 hdev->supports_staged_submission = true; 1919 hdev->supports_wait_for_multi_cs = true; 1920 1921 hdev->asic_funcs->set_pci_memory_regions(hdev); 1922 hdev->stream_master_qid_arr = 1923 hdev->asic_funcs->get_stream_master_qid_arr(); 1924 hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE; 1925 1926 return 0; 1927 1928 free_cpu_accessible_dma_pool: 1929 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1930 free_cpu_dma_mem: 1931 if (!hdev->asic_prop.fw_security_enabled) 1932 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1933 hdev->cpu_pci_msb_addr); 1934 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1935 hdev->cpu_accessible_dma_address); 1936 free_dma_pool: 1937 dma_pool_destroy(hdev->dma_pool); 1938 free_gaudi_device: 1939 kfree(gaudi); 1940 return rc; 1941 } 1942 1943 static int gaudi_sw_fini(struct hl_device *hdev) 1944 { 1945 struct gaudi_device *gaudi = hdev->asic_specific; 1946 1947 gaudi_free_internal_qmans_pq_mem(hdev); 1948 1949 gen_pool_destroy(hdev->cpu_accessible_dma_pool); 1950 1951 if (!hdev->asic_prop.fw_security_enabled) 1952 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address, 1953 hdev->cpu_pci_msb_addr); 1954 1955 hl_asic_dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE, hdev->cpu_accessible_dma_mem, 1956 hdev->cpu_accessible_dma_address); 1957 1958 dma_pool_destroy(hdev->dma_pool); 1959 1960 kfree(gaudi); 1961 1962 return 0; 1963 } 1964 1965 static irqreturn_t gaudi_irq_handler_single(int irq, void *arg) 1966 { 1967 struct hl_device *hdev = arg; 1968 int i; 1969 1970 if (hdev->disabled) 1971 return IRQ_HANDLED; 1972 1973 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1974 hl_irq_handler_cq(irq, &hdev->completion_queue[i]); 1975 1976 hl_irq_handler_eq(irq, &hdev->event_queue); 1977 1978 return IRQ_HANDLED; 1979 } 1980 1981 /* 1982 * For backward compatibility, new MSI interrupts should be set after the 1983 * existing CPU and NIC interrupts. 1984 */ 1985 static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr, 1986 bool cpu_eq) 1987 { 1988 int msi_vec; 1989 1990 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq)) 1991 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n", 1992 GAUDI_EVENT_QUEUE_MSI_IDX); 1993 1994 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr : 1995 (nr + NIC_NUMBER_OF_ENGINES + 1); 1996 1997 return pci_irq_vector(hdev->pdev, msi_vec); 1998 } 1999 2000 static int gaudi_enable_msi_single(struct hl_device *hdev) 2001 { 2002 int rc, irq; 2003 2004 dev_dbg(hdev->dev, "Working in single MSI IRQ mode\n"); 2005 2006 irq = gaudi_pci_irq_vector(hdev, 0, false); 2007 rc = request_irq(irq, gaudi_irq_handler_single, 0, 2008 "gaudi single msi", hdev); 2009 if (rc) 2010 dev_err(hdev->dev, 2011 "Failed to request single MSI IRQ\n"); 2012 2013 return rc; 2014 } 2015 2016 static int gaudi_enable_msi(struct hl_device *hdev) 2017 { 2018 struct gaudi_device *gaudi = hdev->asic_specific; 2019 int rc; 2020 2021 if (gaudi->hw_cap_initialized & HW_CAP_MSI) 2022 return 0; 2023 2024 rc = pci_alloc_irq_vectors(hdev->pdev, 1, 1, PCI_IRQ_MSI); 2025 if (rc < 0) { 2026 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc); 2027 return rc; 2028 } 2029 2030 rc = gaudi_enable_msi_single(hdev); 2031 if (rc) 2032 goto free_pci_irq_vectors; 2033 2034 gaudi->hw_cap_initialized |= HW_CAP_MSI; 2035 2036 return 0; 2037 2038 free_pci_irq_vectors: 2039 pci_free_irq_vectors(hdev->pdev); 2040 return rc; 2041 } 2042 2043 static void gaudi_sync_irqs(struct hl_device *hdev) 2044 { 2045 struct gaudi_device *gaudi = hdev->asic_specific; 2046 2047 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2048 return; 2049 2050 /* Wait for all pending IRQs to be finished */ 2051 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false)); 2052 } 2053 2054 static void gaudi_disable_msi(struct hl_device *hdev) 2055 { 2056 struct gaudi_device *gaudi = hdev->asic_specific; 2057 2058 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI)) 2059 return; 2060 2061 gaudi_sync_irqs(hdev); 2062 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev); 2063 pci_free_irq_vectors(hdev->pdev); 2064 2065 gaudi->hw_cap_initialized &= ~HW_CAP_MSI; 2066 } 2067 2068 static void gaudi_init_scrambler_sram(struct hl_device *hdev) 2069 { 2070 struct gaudi_device *gaudi = hdev->asic_specific; 2071 2072 if (hdev->asic_prop.fw_security_enabled) 2073 return; 2074 2075 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 2076 CPU_BOOT_DEV_STS0_SRAM_SCR_EN) 2077 return; 2078 2079 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER) 2080 return; 2081 2082 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2083 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2084 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2085 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2086 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2087 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2088 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2089 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2090 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2091 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2092 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2093 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2094 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2095 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2096 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2097 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2098 2099 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN, 2100 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2101 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN, 2102 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2103 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN, 2104 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2105 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN, 2106 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2107 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN, 2108 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2109 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN, 2110 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2111 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN, 2112 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2113 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN, 2114 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT); 2115 2116 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN, 2117 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2118 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN, 2119 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2120 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN, 2121 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2122 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN, 2123 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2124 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN, 2125 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2126 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN, 2127 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2128 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN, 2129 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2130 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN, 2131 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT); 2132 2133 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER; 2134 } 2135 2136 static void gaudi_init_scrambler_hbm(struct hl_device *hdev) 2137 { 2138 struct gaudi_device *gaudi = hdev->asic_specific; 2139 2140 if (hdev->asic_prop.fw_security_enabled) 2141 return; 2142 2143 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2144 CPU_BOOT_DEV_STS0_DRAM_SCR_EN) 2145 return; 2146 2147 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER) 2148 return; 2149 2150 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN, 2151 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2152 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN, 2153 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2154 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN, 2155 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2156 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN, 2157 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2158 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN, 2159 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2160 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN, 2161 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2162 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN, 2163 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2164 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN, 2165 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2166 2167 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN, 2168 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2169 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN, 2170 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2171 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN, 2172 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2173 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN, 2174 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2175 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN, 2176 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2177 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN, 2178 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2179 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN, 2180 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2181 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN, 2182 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT); 2183 2184 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN, 2185 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2186 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN, 2187 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2188 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN, 2189 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2190 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN, 2191 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2192 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN, 2193 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2194 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN, 2195 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2196 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN, 2197 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2198 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN, 2199 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT); 2200 2201 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER; 2202 } 2203 2204 static void gaudi_init_e2e(struct hl_device *hdev) 2205 { 2206 if (hdev->asic_prop.fw_security_enabled) 2207 return; 2208 2209 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2210 CPU_BOOT_DEV_STS0_E2E_CRED_EN) 2211 return; 2212 2213 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3); 2214 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3); 2215 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49); 2216 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101); 2217 2218 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2219 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2220 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2221 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2222 2223 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2224 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2225 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2226 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2227 2228 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2229 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2230 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2231 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2232 2233 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2234 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2235 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2236 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2237 2238 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2239 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2240 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2241 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2242 2243 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2244 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2245 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2246 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2247 2248 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3); 2249 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3); 2250 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19); 2251 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19); 2252 2253 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3); 2254 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3); 2255 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79); 2256 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163); 2257 2258 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3); 2259 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3); 2260 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1); 2261 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39); 2262 2263 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1); 2264 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1); 2265 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1); 2266 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32); 2267 2268 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3); 2269 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3); 2270 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19); 2271 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32); 2272 2273 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3); 2274 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3); 2275 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19); 2276 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32); 2277 2278 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1); 2279 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1); 2280 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1); 2281 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32); 2282 2283 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3); 2284 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3); 2285 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1); 2286 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39); 2287 2288 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3); 2289 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3); 2290 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79); 2291 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79); 2292 2293 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2294 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2295 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2296 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2297 2298 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2299 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2300 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2301 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2302 2303 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2304 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2305 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2306 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2307 2308 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2309 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2310 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2311 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2312 2313 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2314 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2315 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2316 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2317 2318 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2319 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2320 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2321 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2322 2323 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3); 2324 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3); 2325 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162); 2326 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338); 2327 2328 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3); 2329 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3); 2330 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162); 2331 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338); 2332 2333 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN, 2334 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2335 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN, 2336 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2337 2338 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN, 2339 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2340 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN, 2341 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2342 2343 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN, 2344 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2345 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN, 2346 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2347 2348 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN, 2349 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2350 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN, 2351 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2352 2353 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN, 2354 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2355 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN, 2356 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2357 2358 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN, 2359 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2360 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN, 2361 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2362 2363 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN, 2364 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2365 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN, 2366 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2367 2368 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN, 2369 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2370 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN, 2371 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2372 2373 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN, 2374 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2375 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN, 2376 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2377 2378 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN, 2379 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2380 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN, 2381 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2382 2383 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN, 2384 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2385 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN, 2386 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2387 2388 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN, 2389 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2390 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN, 2391 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2392 2393 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN, 2394 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2395 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN, 2396 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2397 2398 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN, 2399 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2400 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN, 2401 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2402 2403 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN, 2404 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2405 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN, 2406 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2407 2408 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN, 2409 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT); 2410 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN, 2411 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT); 2412 2413 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN, 2414 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2415 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN, 2416 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2417 2418 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN, 2419 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2420 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN, 2421 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2422 2423 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN, 2424 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2425 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN, 2426 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2427 2428 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN, 2429 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2430 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN, 2431 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2432 2433 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN, 2434 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2435 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN, 2436 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2437 2438 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN, 2439 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2440 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN, 2441 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2442 2443 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN, 2444 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2445 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN, 2446 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2447 2448 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN, 2449 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT); 2450 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN, 2451 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT); 2452 } 2453 2454 static void gaudi_init_hbm_cred(struct hl_device *hdev) 2455 { 2456 u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; 2457 2458 if (hdev->asic_prop.fw_security_enabled) 2459 return; 2460 2461 if (hdev->asic_prop.fw_bootfit_cpu_boot_dev_sts0 & 2462 CPU_BOOT_DEV_STS0_HBM_CRED_EN) 2463 return; 2464 2465 hbm0_wr = 0x33333333; 2466 hbm0_rd = 0x77777777; 2467 hbm1_wr = 0x55555555; 2468 hbm1_rd = 0xDDDDDDDD; 2469 2470 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr); 2471 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr); 2472 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd); 2473 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd); 2474 2475 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr); 2476 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr); 2477 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd); 2478 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd); 2479 2480 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr); 2481 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr); 2482 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd); 2483 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd); 2484 2485 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr); 2486 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr); 2487 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd); 2488 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd); 2489 2490 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0, 2491 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2492 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2493 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0, 2494 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2495 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2496 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0, 2497 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2498 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2499 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0, 2500 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2501 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2502 2503 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1, 2504 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2505 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2506 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1, 2507 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2508 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2509 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1, 2510 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2511 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2512 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1, 2513 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) | 2514 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT)); 2515 } 2516 2517 static void gaudi_init_golden_registers(struct hl_device *hdev) 2518 { 2519 u32 tpc_offset; 2520 int tpc_id, i; 2521 2522 gaudi_init_e2e(hdev); 2523 gaudi_init_hbm_cred(hdev); 2524 2525 for (tpc_id = 0, tpc_offset = 0; 2526 tpc_id < TPC_NUMBER_OF_ENGINES; 2527 tpc_id++, tpc_offset += TPC_CFG_OFFSET) { 2528 /* Mask all arithmetic interrupts from TPC */ 2529 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE); 2530 /* Set 16 cache lines */ 2531 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset, 2532 ICACHE_FETCH_LINE_NUM, 2); 2533 } 2534 2535 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */ 2536 for (i = 0 ; i < 128 ; i += 8) 2537 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i); 2538 2539 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2540 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2541 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2542 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3); 2543 } 2544 2545 static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, 2546 int qman_id, dma_addr_t qman_pq_addr) 2547 { 2548 struct cpu_dyn_regs *dyn_regs = 2549 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2550 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2551 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2552 u32 q_off, dma_qm_offset; 2553 u32 dma_qm_err_cfg, irq_handler_offset; 2554 2555 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2556 2557 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2558 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2559 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2560 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2561 so_base_en_lo = lower_32_bits(CFG_BASE + 2562 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2563 so_base_en_hi = upper_32_bits(CFG_BASE + 2564 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2565 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2566 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2567 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2568 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2569 so_base_ws_lo = lower_32_bits(CFG_BASE + 2570 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2571 so_base_ws_hi = upper_32_bits(CFG_BASE + 2572 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2573 2574 q_off = dma_qm_offset + qman_id * 4; 2575 2576 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr)); 2577 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr)); 2578 2579 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH)); 2580 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2581 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2582 2583 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, QMAN_LDMA_SIZE_OFFSET); 2584 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2585 QMAN_LDMA_SRC_OFFSET); 2586 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2587 QMAN_LDMA_DST_OFFSET); 2588 2589 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2590 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2591 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2592 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2593 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 2594 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 2595 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 2596 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 2597 2598 WREG32(mmDMA0_QM_CP_BARRIER_CFG_0 + q_off, 0x100); 2599 2600 /* The following configuration is needed only once per QMAN */ 2601 if (qman_id == 0) { 2602 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2603 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2604 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2605 2606 /* Configure RAZWI IRQ */ 2607 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2608 if (hdev->stop_on_err) 2609 dma_qm_err_cfg |= 2610 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2611 2612 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2613 2614 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2615 lower_32_bits(CFG_BASE + irq_handler_offset)); 2616 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2617 upper_32_bits(CFG_BASE + irq_handler_offset)); 2618 2619 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2620 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2621 dma_id); 2622 2623 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2624 QM_ARB_ERR_MSG_EN_MASK); 2625 2626 /* Set timeout to maximum */ 2627 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2628 2629 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2630 QMAN_EXTERNAL_MAKE_TRUSTED); 2631 2632 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2633 } 2634 } 2635 2636 static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id) 2637 { 2638 struct cpu_dyn_regs *dyn_regs = 2639 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2640 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT; 2641 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 2642 u32 irq_handler_offset; 2643 2644 /* Set to maximum possible according to physical size */ 2645 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0); 2646 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0); 2647 2648 /* WA for H/W bug H3-2116 */ 2649 WREG32(mmDMA0_CORE_LBW_MAX_OUTSTAND + dma_offset, 15); 2650 2651 /* STOP_ON bit implies no completion to operation in case of RAZWI */ 2652 if (hdev->stop_on_err) 2653 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT; 2654 2655 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg); 2656 2657 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2658 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2659 le32_to_cpu(dyn_regs->gic_dma_core_irq_ctrl); 2660 2661 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset, 2662 lower_32_bits(CFG_BASE + irq_handler_offset)); 2663 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset, 2664 upper_32_bits(CFG_BASE + irq_handler_offset)); 2665 2666 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset, 2667 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id); 2668 WREG32(mmDMA0_CORE_PROT + dma_offset, 2669 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT); 2670 /* If the channel is secured, it should be in MMU bypass mode */ 2671 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset, 2672 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT); 2673 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT); 2674 } 2675 2676 static void gaudi_enable_qman(struct hl_device *hdev, int dma_id, 2677 u32 enable_mask) 2678 { 2679 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2680 2681 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask); 2682 } 2683 2684 static void gaudi_init_pci_dma_qmans(struct hl_device *hdev) 2685 { 2686 struct gaudi_device *gaudi = hdev->asic_specific; 2687 struct hl_hw_queue *q; 2688 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0; 2689 2690 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA) 2691 return; 2692 2693 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) { 2694 dma_id = gaudi_dma_assignment[i]; 2695 /* 2696 * For queues after the CPU Q need to add 1 to get the correct 2697 * queue. In addition, need to add the CPU EQ and NIC IRQs in 2698 * order to get the correct MSI register. 2699 */ 2700 if (dma_id > 1) { 2701 cpu_skip = 1; 2702 nic_skip = NIC_NUMBER_OF_ENGINES; 2703 } else { 2704 cpu_skip = 0; 2705 nic_skip = 0; 2706 } 2707 2708 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2709 q_idx = 4 * dma_id + j + cpu_skip; 2710 q = &hdev->kernel_queues[q_idx]; 2711 q->cq_id = cq_id++; 2712 q->msi_vec = nic_skip + cpu_skip + msi_vec++; 2713 gaudi_init_pci_dma_qman(hdev, dma_id, j, 2714 q->bus_address); 2715 } 2716 2717 gaudi_init_dma_core(hdev, dma_id); 2718 2719 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE); 2720 } 2721 2722 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA; 2723 } 2724 2725 static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, 2726 int qman_id, u64 qman_base_addr) 2727 { 2728 struct cpu_dyn_regs *dyn_regs = 2729 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2730 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2731 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 2732 u32 dma_qm_err_cfg, irq_handler_offset; 2733 u32 q_off, dma_qm_offset; 2734 2735 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 2736 2737 mtr_base_en_lo = lower_32_bits(CFG_BASE + 2738 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2739 mtr_base_en_hi = upper_32_bits(CFG_BASE + 2740 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2741 so_base_en_lo = lower_32_bits(CFG_BASE + 2742 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2743 so_base_en_hi = upper_32_bits(CFG_BASE + 2744 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2745 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 2746 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2747 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 2748 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2749 so_base_ws_lo = lower_32_bits(CFG_BASE + 2750 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2751 so_base_ws_hi = upper_32_bits(CFG_BASE + 2752 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 2753 2754 q_off = dma_qm_offset + qman_id * 4; 2755 2756 if (qman_id < 4) { 2757 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, 2758 lower_32_bits(qman_base_addr)); 2759 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, 2760 upper_32_bits(qman_base_addr)); 2761 2762 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH)); 2763 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0); 2764 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0); 2765 2766 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2767 QMAN_CPDMA_SIZE_OFFSET); 2768 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2769 QMAN_CPDMA_SRC_OFFSET); 2770 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2771 QMAN_CPDMA_DST_OFFSET); 2772 } else { 2773 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2774 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2775 le32_to_cpu(dyn_regs->gic_dma_qm_irq_ctrl); 2776 2777 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2778 QMAN_LDMA_SIZE_OFFSET); 2779 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2780 QMAN_LDMA_SRC_OFFSET); 2781 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2782 QMAN_LDMA_DST_OFFSET); 2783 2784 /* Configure RAZWI IRQ */ 2785 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2786 if (hdev->stop_on_err) 2787 dma_qm_err_cfg |= 2788 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2789 2790 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg); 2791 2792 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset, 2793 lower_32_bits(CFG_BASE + irq_handler_offset)); 2794 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset, 2795 upper_32_bits(CFG_BASE + irq_handler_offset)); 2796 2797 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset, 2798 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id + 2799 dma_id); 2800 2801 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, 2802 QM_ARB_ERR_MSG_EN_MASK); 2803 2804 /* Set timeout to maximum */ 2805 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); 2806 2807 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); 2808 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, 2809 QMAN_INTERNAL_MAKE_TRUSTED); 2810 } 2811 2812 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 2813 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 2814 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 2815 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 2816 2817 /* Configure DMA5 CP_MSG_BASE 2/3 for sync stream collective */ 2818 if (gaudi_dma_assignment[dma_id] == GAUDI_ENGINE_ID_DMA_5) { 2819 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 2820 mtr_base_ws_lo); 2821 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 2822 mtr_base_ws_hi); 2823 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 2824 so_base_ws_lo); 2825 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 2826 so_base_ws_hi); 2827 } 2828 } 2829 2830 static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev) 2831 { 2832 struct gaudi_device *gaudi = hdev->asic_specific; 2833 struct gaudi_internal_qman_info *q; 2834 u64 qman_base_addr; 2835 int i, j, dma_id, internal_q_index; 2836 2837 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA) 2838 return; 2839 2840 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) { 2841 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i]; 2842 2843 for (j = 0 ; j < QMAN_STREAMS ; j++) { 2844 /* 2845 * Add the CPU queue in order to get the correct queue 2846 * number as all internal queue are placed after it 2847 */ 2848 internal_q_index = dma_id * QMAN_STREAMS + j + 1; 2849 2850 q = &gaudi->internal_qmans[internal_q_index]; 2851 qman_base_addr = (u64) q->pq_dma_addr; 2852 gaudi_init_hbm_dma_qman(hdev, dma_id, j, 2853 qman_base_addr); 2854 } 2855 2856 /* Initializing lower CP for HBM DMA QMAN */ 2857 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0); 2858 2859 gaudi_init_dma_core(hdev, dma_id); 2860 2861 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE); 2862 } 2863 2864 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA; 2865 } 2866 2867 static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, 2868 int qman_id, u64 qman_base_addr) 2869 { 2870 struct cpu_dyn_regs *dyn_regs = 2871 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2872 u32 mtr_base_lo, mtr_base_hi; 2873 u32 so_base_lo, so_base_hi; 2874 u32 irq_handler_offset; 2875 u32 q_off, mme_id; 2876 u32 mme_qm_err_cfg; 2877 2878 mtr_base_lo = lower_32_bits(CFG_BASE + 2879 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2880 mtr_base_hi = upper_32_bits(CFG_BASE + 2881 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 2882 so_base_lo = lower_32_bits(CFG_BASE + 2883 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2884 so_base_hi = upper_32_bits(CFG_BASE + 2885 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 2886 2887 q_off = mme_offset + qman_id * 4; 2888 2889 if (qman_id < 4) { 2890 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off, 2891 lower_32_bits(qman_base_addr)); 2892 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off, 2893 upper_32_bits(qman_base_addr)); 2894 2895 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH)); 2896 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0); 2897 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0); 2898 2899 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2900 QMAN_CPDMA_SIZE_OFFSET); 2901 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2902 QMAN_CPDMA_SRC_OFFSET); 2903 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2904 QMAN_CPDMA_DST_OFFSET); 2905 } else { 2906 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 2907 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 2908 le32_to_cpu(dyn_regs->gic_mme_qm_irq_ctrl); 2909 2910 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 2911 QMAN_LDMA_SIZE_OFFSET); 2912 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 2913 QMAN_LDMA_SRC_OFFSET); 2914 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 2915 QMAN_LDMA_DST_OFFSET); 2916 2917 /* Configure RAZWI IRQ */ 2918 mme_id = mme_offset / 2919 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0) / 2; 2920 2921 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 2922 if (hdev->stop_on_err) 2923 mme_qm_err_cfg |= 2924 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 2925 2926 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg); 2927 2928 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset, 2929 lower_32_bits(CFG_BASE + irq_handler_offset)); 2930 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset, 2931 upper_32_bits(CFG_BASE + irq_handler_offset)); 2932 2933 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset, 2934 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id + 2935 mme_id); 2936 2937 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, 2938 QM_ARB_ERR_MSG_EN_MASK); 2939 2940 /* Set timeout to maximum */ 2941 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); 2942 2943 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); 2944 WREG32(mmMME0_QM_GLBL_PROT + mme_offset, 2945 QMAN_INTERNAL_MAKE_TRUSTED); 2946 } 2947 2948 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo); 2949 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi); 2950 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo); 2951 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi); 2952 } 2953 2954 static void gaudi_init_mme_qmans(struct hl_device *hdev) 2955 { 2956 struct gaudi_device *gaudi = hdev->asic_specific; 2957 struct gaudi_internal_qman_info *q; 2958 u64 qman_base_addr; 2959 u32 mme_offset; 2960 int i, internal_q_index; 2961 2962 if (gaudi->hw_cap_initialized & HW_CAP_MME) 2963 return; 2964 2965 /* 2966 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE) 2967 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE) 2968 */ 2969 2970 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2971 2972 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) { 2973 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i; 2974 q = &gaudi->internal_qmans[internal_q_index]; 2975 qman_base_addr = (u64) q->pq_dma_addr; 2976 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3), 2977 qman_base_addr); 2978 if (i == 3) 2979 mme_offset = 0; 2980 } 2981 2982 /* Initializing lower CP for MME QMANs */ 2983 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0; 2984 gaudi_init_mme_qman(hdev, mme_offset, 4, 0); 2985 gaudi_init_mme_qman(hdev, 0, 4, 0); 2986 2987 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2988 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE); 2989 2990 gaudi->hw_cap_initialized |= HW_CAP_MME; 2991 } 2992 2993 static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, 2994 int qman_id, u64 qman_base_addr) 2995 { 2996 struct cpu_dyn_regs *dyn_regs = 2997 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 2998 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 2999 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3000 u32 tpc_qm_err_cfg, irq_handler_offset; 3001 u32 q_off, tpc_id; 3002 3003 mtr_base_en_lo = lower_32_bits(CFG_BASE + 3004 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3005 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3006 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3007 so_base_en_lo = lower_32_bits(CFG_BASE + 3008 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3009 so_base_en_hi = upper_32_bits(CFG_BASE + 3010 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3011 mtr_base_ws_lo = lower_32_bits(CFG_BASE + 3012 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3013 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3014 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3015 so_base_ws_lo = lower_32_bits(CFG_BASE + 3016 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3017 so_base_ws_hi = upper_32_bits(CFG_BASE + 3018 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3019 3020 q_off = tpc_offset + qman_id * 4; 3021 3022 tpc_id = tpc_offset / 3023 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0); 3024 3025 if (qman_id < 4) { 3026 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off, 3027 lower_32_bits(qman_base_addr)); 3028 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off, 3029 upper_32_bits(qman_base_addr)); 3030 3031 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH)); 3032 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0); 3033 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0); 3034 3035 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3036 QMAN_CPDMA_SIZE_OFFSET); 3037 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3038 QMAN_CPDMA_SRC_OFFSET); 3039 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3040 QMAN_CPDMA_DST_OFFSET); 3041 } else { 3042 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3043 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3044 le32_to_cpu(dyn_regs->gic_tpc_qm_irq_ctrl); 3045 3046 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3047 QMAN_LDMA_SIZE_OFFSET); 3048 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3049 QMAN_LDMA_SRC_OFFSET); 3050 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3051 QMAN_LDMA_DST_OFFSET); 3052 3053 /* Configure RAZWI IRQ */ 3054 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3055 if (hdev->stop_on_err) 3056 tpc_qm_err_cfg |= 3057 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3058 3059 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg); 3060 3061 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset, 3062 lower_32_bits(CFG_BASE + irq_handler_offset)); 3063 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset, 3064 upper_32_bits(CFG_BASE + irq_handler_offset)); 3065 3066 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset, 3067 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id + 3068 tpc_id); 3069 3070 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, 3071 QM_ARB_ERR_MSG_EN_MASK); 3072 3073 /* Set timeout to maximum */ 3074 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); 3075 3076 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); 3077 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, 3078 QMAN_INTERNAL_MAKE_TRUSTED); 3079 } 3080 3081 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3082 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3083 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3084 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3085 3086 /* Configure TPC7 CP_MSG_BASE 2/3 for sync stream collective */ 3087 if (tpc_id == 6) { 3088 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, 3089 mtr_base_ws_lo); 3090 WREG32(mmTPC0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, 3091 mtr_base_ws_hi); 3092 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, 3093 so_base_ws_lo); 3094 WREG32(mmTPC0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, 3095 so_base_ws_hi); 3096 } 3097 } 3098 3099 static void gaudi_init_tpc_qmans(struct hl_device *hdev) 3100 { 3101 struct gaudi_device *gaudi = hdev->asic_specific; 3102 struct gaudi_internal_qman_info *q; 3103 u64 qman_base_addr; 3104 u32 so_base_hi, tpc_offset = 0; 3105 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH - 3106 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH; 3107 int i, tpc_id, internal_q_index; 3108 3109 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK) 3110 return; 3111 3112 so_base_hi = upper_32_bits(CFG_BASE + 3113 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3114 3115 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3116 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3117 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 + 3118 tpc_id * QMAN_STREAMS + i; 3119 q = &gaudi->internal_qmans[internal_q_index]; 3120 qman_base_addr = (u64) q->pq_dma_addr; 3121 gaudi_init_tpc_qman(hdev, tpc_offset, i, 3122 qman_base_addr); 3123 3124 if (i == 3) { 3125 /* Initializing lower CP for TPC QMAN */ 3126 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0); 3127 3128 /* Enable the QMAN and TPC channel */ 3129 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 3130 QMAN_TPC_ENABLE); 3131 } 3132 } 3133 3134 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta, 3135 so_base_hi); 3136 3137 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3138 3139 gaudi->hw_cap_initialized |= 3140 FIELD_PREP(HW_CAP_TPC_MASK, 1 << tpc_id); 3141 } 3142 } 3143 3144 static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, 3145 int qman_id, u64 qman_base_addr, int nic_id) 3146 { 3147 struct cpu_dyn_regs *dyn_regs = 3148 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3149 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi; 3150 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi; 3151 u32 nic_qm_err_cfg, irq_handler_offset; 3152 u32 q_off; 3153 3154 mtr_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3155 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3156 mtr_base_en_hi = upper_32_bits(CFG_BASE + 3157 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3158 so_base_en_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3159 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3160 so_base_en_hi = upper_32_bits(CFG_BASE + 3161 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0); 3162 mtr_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3163 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3164 mtr_base_ws_hi = upper_32_bits(CFG_BASE + 3165 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0); 3166 so_base_ws_lo = lower_32_bits((CFG_BASE & U32_MAX) + 3167 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3168 so_base_ws_hi = upper_32_bits(CFG_BASE + 3169 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0); 3170 3171 q_off = nic_offset + qman_id * 4; 3172 3173 WREG32(mmNIC0_QM0_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_base_addr)); 3174 WREG32(mmNIC0_QM0_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_base_addr)); 3175 3176 WREG32(mmNIC0_QM0_PQ_SIZE_0 + q_off, ilog2(NIC_QMAN_LENGTH)); 3177 WREG32(mmNIC0_QM0_PQ_PI_0 + q_off, 0); 3178 WREG32(mmNIC0_QM0_PQ_CI_0 + q_off, 0); 3179 3180 WREG32(mmNIC0_QM0_CP_LDMA_TSIZE_OFFSET_0 + q_off, 3181 QMAN_LDMA_SIZE_OFFSET); 3182 WREG32(mmNIC0_QM0_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 3183 QMAN_LDMA_SRC_OFFSET); 3184 WREG32(mmNIC0_QM0_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 3185 QMAN_LDMA_DST_OFFSET); 3186 3187 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo); 3188 WREG32(mmNIC0_QM0_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi); 3189 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo); 3190 WREG32(mmNIC0_QM0_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi); 3191 3192 /* Configure NIC CP_MSG_BASE 2/3 for sync stream collective */ 3193 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo); 3194 WREG32(mmNIC0_QM0_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi); 3195 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo); 3196 WREG32(mmNIC0_QM0_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi); 3197 3198 if (qman_id == 0) { 3199 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 3200 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3201 le32_to_cpu(dyn_regs->gic_nic_qm_irq_ctrl); 3202 3203 /* Configure RAZWI IRQ */ 3204 nic_qm_err_cfg = NIC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK; 3205 if (hdev->stop_on_err) 3206 nic_qm_err_cfg |= 3207 NIC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK; 3208 3209 WREG32(mmNIC0_QM0_GLBL_ERR_CFG + nic_offset, nic_qm_err_cfg); 3210 3211 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_LO + nic_offset, 3212 lower_32_bits(CFG_BASE + irq_handler_offset)); 3213 WREG32(mmNIC0_QM0_GLBL_ERR_ADDR_HI + nic_offset, 3214 upper_32_bits(CFG_BASE + irq_handler_offset)); 3215 3216 WREG32(mmNIC0_QM0_GLBL_ERR_WDATA + nic_offset, 3217 gaudi_irq_map_table[GAUDI_EVENT_NIC0_QM0].cpu_id + 3218 nic_id); 3219 3220 WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, 3221 QM_ARB_ERR_MSG_EN_MASK); 3222 3223 /* Set timeout to maximum */ 3224 WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); 3225 3226 WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); 3227 WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, 3228 QMAN_INTERNAL_MAKE_TRUSTED); 3229 } 3230 } 3231 3232 static void gaudi_init_nic_qmans(struct hl_device *hdev) 3233 { 3234 struct gaudi_device *gaudi = hdev->asic_specific; 3235 struct gaudi_internal_qman_info *q; 3236 u64 qman_base_addr; 3237 u32 nic_offset = 0; 3238 u32 nic_delta_between_qmans = 3239 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3240 u32 nic_delta_between_nics = 3241 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3242 int i, nic_id, internal_q_index; 3243 3244 if (!hdev->nic_ports_mask) 3245 return; 3246 3247 if (gaudi->hw_cap_initialized & HW_CAP_NIC_MASK) 3248 return; 3249 3250 dev_dbg(hdev->dev, "Initializing NIC QMANs\n"); 3251 3252 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3253 if (!(hdev->nic_ports_mask & (1 << nic_id))) { 3254 nic_offset += nic_delta_between_qmans; 3255 if (nic_id & 1) { 3256 nic_offset -= (nic_delta_between_qmans * 2); 3257 nic_offset += nic_delta_between_nics; 3258 } 3259 continue; 3260 } 3261 3262 for (i = 0 ; i < QMAN_STREAMS ; i++) { 3263 internal_q_index = GAUDI_QUEUE_ID_NIC_0_0 + 3264 nic_id * QMAN_STREAMS + i; 3265 q = &gaudi->internal_qmans[internal_q_index]; 3266 qman_base_addr = (u64) q->pq_dma_addr; 3267 gaudi_init_nic_qman(hdev, nic_offset, (i & 0x3), 3268 qman_base_addr, nic_id); 3269 } 3270 3271 /* Enable the QMAN */ 3272 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, NIC_QMAN_ENABLE); 3273 3274 nic_offset += nic_delta_between_qmans; 3275 if (nic_id & 1) { 3276 nic_offset -= (nic_delta_between_qmans * 2); 3277 nic_offset += nic_delta_between_nics; 3278 } 3279 3280 gaudi->hw_cap_initialized |= 1 << (HW_CAP_NIC_SHIFT + nic_id); 3281 } 3282 } 3283 3284 static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev) 3285 { 3286 struct gaudi_device *gaudi = hdev->asic_specific; 3287 3288 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3289 return; 3290 3291 WREG32(mmDMA0_QM_GLBL_CFG0, 0); 3292 WREG32(mmDMA1_QM_GLBL_CFG0, 0); 3293 WREG32(mmDMA5_QM_GLBL_CFG0, 0); 3294 } 3295 3296 static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev) 3297 { 3298 struct gaudi_device *gaudi = hdev->asic_specific; 3299 3300 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3301 return; 3302 3303 WREG32(mmDMA2_QM_GLBL_CFG0, 0); 3304 WREG32(mmDMA3_QM_GLBL_CFG0, 0); 3305 WREG32(mmDMA4_QM_GLBL_CFG0, 0); 3306 WREG32(mmDMA6_QM_GLBL_CFG0, 0); 3307 WREG32(mmDMA7_QM_GLBL_CFG0, 0); 3308 } 3309 3310 static void gaudi_disable_mme_qmans(struct hl_device *hdev) 3311 { 3312 struct gaudi_device *gaudi = hdev->asic_specific; 3313 3314 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3315 return; 3316 3317 WREG32(mmMME2_QM_GLBL_CFG0, 0); 3318 WREG32(mmMME0_QM_GLBL_CFG0, 0); 3319 } 3320 3321 static void gaudi_disable_tpc_qmans(struct hl_device *hdev) 3322 { 3323 struct gaudi_device *gaudi = hdev->asic_specific; 3324 u32 tpc_offset = 0; 3325 int tpc_id; 3326 3327 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3328 return; 3329 3330 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) { 3331 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0); 3332 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0; 3333 } 3334 } 3335 3336 static void gaudi_disable_nic_qmans(struct hl_device *hdev) 3337 { 3338 struct gaudi_device *gaudi = hdev->asic_specific; 3339 u32 nic_mask, nic_offset = 0; 3340 u32 nic_delta_between_qmans = 3341 mmNIC0_QM1_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3342 u32 nic_delta_between_nics = 3343 mmNIC1_QM0_GLBL_CFG0 - mmNIC0_QM0_GLBL_CFG0; 3344 int nic_id; 3345 3346 for (nic_id = 0 ; nic_id < NIC_NUMBER_OF_ENGINES ; nic_id++) { 3347 nic_mask = 1 << (HW_CAP_NIC_SHIFT + nic_id); 3348 3349 if (gaudi->hw_cap_initialized & nic_mask) 3350 WREG32(mmNIC0_QM0_GLBL_CFG0 + nic_offset, 0); 3351 3352 nic_offset += nic_delta_between_qmans; 3353 if (nic_id & 1) { 3354 nic_offset -= (nic_delta_between_qmans * 2); 3355 nic_offset += nic_delta_between_nics; 3356 } 3357 } 3358 } 3359 3360 static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev) 3361 { 3362 struct gaudi_device *gaudi = hdev->asic_specific; 3363 3364 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3365 return; 3366 3367 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */ 3368 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3369 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3370 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3371 } 3372 3373 static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev) 3374 { 3375 struct gaudi_device *gaudi = hdev->asic_specific; 3376 3377 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3378 return; 3379 3380 /* Stop CPs of HBM DMA QMANs */ 3381 3382 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3383 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3384 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3385 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3386 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3387 } 3388 3389 static void gaudi_stop_mme_qmans(struct hl_device *hdev) 3390 { 3391 struct gaudi_device *gaudi = hdev->asic_specific; 3392 3393 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3394 return; 3395 3396 /* Stop CPs of MME QMANs */ 3397 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3398 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3399 } 3400 3401 static void gaudi_stop_tpc_qmans(struct hl_device *hdev) 3402 { 3403 struct gaudi_device *gaudi = hdev->asic_specific; 3404 3405 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3406 return; 3407 3408 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3409 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3410 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3411 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3412 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3413 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3414 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3415 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT); 3416 } 3417 3418 static void gaudi_stop_nic_qmans(struct hl_device *hdev) 3419 { 3420 struct gaudi_device *gaudi = hdev->asic_specific; 3421 3422 /* Stop upper CPs of QMANs */ 3423 3424 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) 3425 WREG32(mmNIC0_QM0_GLBL_CFG1, 3426 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3427 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3428 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3429 3430 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) 3431 WREG32(mmNIC0_QM1_GLBL_CFG1, 3432 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3433 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3434 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3435 3436 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) 3437 WREG32(mmNIC1_QM0_GLBL_CFG1, 3438 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3439 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3440 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3441 3442 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) 3443 WREG32(mmNIC1_QM1_GLBL_CFG1, 3444 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3445 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3446 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3447 3448 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) 3449 WREG32(mmNIC2_QM0_GLBL_CFG1, 3450 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3451 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3452 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3453 3454 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) 3455 WREG32(mmNIC2_QM1_GLBL_CFG1, 3456 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3457 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3458 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3459 3460 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) 3461 WREG32(mmNIC3_QM0_GLBL_CFG1, 3462 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3463 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3464 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3465 3466 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) 3467 WREG32(mmNIC3_QM1_GLBL_CFG1, 3468 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3469 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3470 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3471 3472 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) 3473 WREG32(mmNIC4_QM0_GLBL_CFG1, 3474 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3475 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3476 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3477 3478 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) 3479 WREG32(mmNIC4_QM1_GLBL_CFG1, 3480 NIC0_QM0_GLBL_CFG1_PQF_STOP_MASK | 3481 NIC0_QM0_GLBL_CFG1_CQF_STOP_MASK | 3482 NIC0_QM0_GLBL_CFG1_CP_STOP_MASK); 3483 } 3484 3485 static void gaudi_pci_dma_stall(struct hl_device *hdev) 3486 { 3487 struct gaudi_device *gaudi = hdev->asic_specific; 3488 3489 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)) 3490 return; 3491 3492 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3493 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3494 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3495 } 3496 3497 static void gaudi_hbm_dma_stall(struct hl_device *hdev) 3498 { 3499 struct gaudi_device *gaudi = hdev->asic_specific; 3500 3501 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)) 3502 return; 3503 3504 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3505 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3506 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3507 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3508 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT); 3509 } 3510 3511 static void gaudi_mme_stall(struct hl_device *hdev) 3512 { 3513 struct gaudi_device *gaudi = hdev->asic_specific; 3514 3515 if (!(gaudi->hw_cap_initialized & HW_CAP_MME)) 3516 return; 3517 3518 /* WA for H3-1800 bug: do ACC and SBAB writes twice */ 3519 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3520 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3521 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3522 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3523 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3524 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3525 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3526 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3527 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3528 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3529 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3530 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3531 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3532 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT); 3533 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3534 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT); 3535 } 3536 3537 static void gaudi_tpc_stall(struct hl_device *hdev) 3538 { 3539 struct gaudi_device *gaudi = hdev->asic_specific; 3540 3541 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)) 3542 return; 3543 3544 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3545 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3546 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3547 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3548 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3549 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3550 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3551 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT); 3552 } 3553 3554 static void gaudi_disable_clock_gating(struct hl_device *hdev) 3555 { 3556 u32 qman_offset; 3557 int i; 3558 3559 if (hdev->asic_prop.fw_security_enabled) 3560 return; 3561 3562 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 3563 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0); 3564 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0); 3565 3566 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG); 3567 } 3568 3569 WREG32(mmMME0_QM_CGM_CFG, 0); 3570 WREG32(mmMME0_QM_CGM_CFG1, 0); 3571 WREG32(mmMME2_QM_CGM_CFG, 0); 3572 WREG32(mmMME2_QM_CGM_CFG1, 0); 3573 3574 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 3575 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0); 3576 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0); 3577 3578 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG); 3579 } 3580 } 3581 3582 static void gaudi_enable_timestamp(struct hl_device *hdev) 3583 { 3584 /* Disable the timestamp counter */ 3585 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3586 3587 /* Zero the lower/upper parts of the 64-bit counter */ 3588 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0); 3589 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0); 3590 3591 /* Enable the counter */ 3592 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1); 3593 } 3594 3595 static void gaudi_disable_timestamp(struct hl_device *hdev) 3596 { 3597 /* Disable the timestamp counter */ 3598 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0); 3599 } 3600 3601 static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) 3602 { 3603 u32 wait_timeout_ms; 3604 3605 if (hdev->pldm) 3606 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 3607 else 3608 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC; 3609 3610 if (fw_reset) 3611 goto skip_engines; 3612 3613 gaudi_stop_nic_qmans(hdev); 3614 gaudi_stop_mme_qmans(hdev); 3615 gaudi_stop_tpc_qmans(hdev); 3616 gaudi_stop_hbm_dma_qmans(hdev); 3617 gaudi_stop_pci_dma_qmans(hdev); 3618 3619 msleep(wait_timeout_ms); 3620 3621 gaudi_pci_dma_stall(hdev); 3622 gaudi_hbm_dma_stall(hdev); 3623 gaudi_tpc_stall(hdev); 3624 gaudi_mme_stall(hdev); 3625 3626 msleep(wait_timeout_ms); 3627 3628 gaudi_disable_nic_qmans(hdev); 3629 gaudi_disable_mme_qmans(hdev); 3630 gaudi_disable_tpc_qmans(hdev); 3631 gaudi_disable_hbm_dma_qmans(hdev); 3632 gaudi_disable_pci_dma_qmans(hdev); 3633 3634 gaudi_disable_timestamp(hdev); 3635 3636 skip_engines: 3637 gaudi_disable_msi(hdev); 3638 } 3639 3640 static int gaudi_mmu_init(struct hl_device *hdev) 3641 { 3642 struct asic_fixed_properties *prop = &hdev->asic_prop; 3643 struct gaudi_device *gaudi = hdev->asic_specific; 3644 u64 hop0_addr; 3645 int rc, i; 3646 3647 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 3648 return 0; 3649 3650 for (i = 0 ; i < prop->max_asid ; i++) { 3651 hop0_addr = prop->mmu_pgt_addr + 3652 (i * prop->dmmu.hop_table_size); 3653 3654 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr); 3655 if (rc) { 3656 dev_err(hdev->dev, 3657 "failed to set hop0 addr for asid %d\n", i); 3658 return rc; 3659 } 3660 } 3661 3662 /* init MMU cache manage page */ 3663 WREG32(mmSTLB_CACHE_INV_BASE_39_8, prop->mmu_cache_mng_addr >> 8); 3664 WREG32(mmSTLB_CACHE_INV_BASE_49_40, prop->mmu_cache_mng_addr >> 40); 3665 3666 /* mem cache invalidation */ 3667 WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1); 3668 3669 rc = hl_mmu_invalidate_cache(hdev, true, 0); 3670 if (rc) 3671 return rc; 3672 3673 WREG32(mmMMU_UP_MMU_ENABLE, 1); 3674 WREG32(mmMMU_UP_SPI_MASK, 0xF); 3675 3676 WREG32(mmSTLB_HOP_CONFIGURATION, 0x30440); 3677 3678 /* 3679 * The H/W expects the first PI after init to be 1. After wraparound 3680 * we'll write 0. 3681 */ 3682 gaudi->mmu_cache_inv_pi = 1; 3683 3684 gaudi->hw_cap_initialized |= HW_CAP_MMU; 3685 3686 return 0; 3687 } 3688 3689 static int gaudi_load_firmware_to_device(struct hl_device *hdev) 3690 { 3691 void __iomem *dst; 3692 3693 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET; 3694 3695 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst, 0, 0); 3696 } 3697 3698 static int gaudi_load_boot_fit_to_device(struct hl_device *hdev) 3699 { 3700 void __iomem *dst; 3701 3702 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET; 3703 3704 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst, 0, 0); 3705 } 3706 3707 static void gaudi_init_dynamic_firmware_loader(struct hl_device *hdev) 3708 { 3709 struct dynamic_fw_load_mgr *dynamic_loader; 3710 struct cpu_dyn_regs *dyn_regs; 3711 3712 dynamic_loader = &hdev->fw_loader.dynamic_loader; 3713 3714 /* 3715 * here we update initial values for few specific dynamic regs (as 3716 * before reading the first descriptor from FW those value has to be 3717 * hard-coded) in later stages of the protocol those values will be 3718 * updated automatically by reading the FW descriptor so data there 3719 * will always be up-to-date 3720 */ 3721 dyn_regs = &dynamic_loader->comm_desc.cpu_dyn_regs; 3722 dyn_regs->kmd_msg_to_cpu = 3723 cpu_to_le32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU); 3724 dyn_regs->cpu_cmd_status_to_host = 3725 cpu_to_le32(mmCPU_CMD_STATUS_TO_HOST); 3726 3727 dynamic_loader->wait_for_bl_timeout = GAUDI_WAIT_FOR_BL_TIMEOUT_USEC; 3728 } 3729 3730 static void gaudi_init_static_firmware_loader(struct hl_device *hdev) 3731 { 3732 struct static_fw_load_mgr *static_loader; 3733 3734 static_loader = &hdev->fw_loader.static_loader; 3735 3736 static_loader->preboot_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3737 static_loader->boot_fit_version_max_off = SRAM_SIZE - VERSION_MAX_LEN; 3738 static_loader->kmd_msg_to_cpu_reg = mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU; 3739 static_loader->cpu_cmd_status_to_host_reg = mmCPU_CMD_STATUS_TO_HOST; 3740 static_loader->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3741 static_loader->cpu_boot_dev_status0_reg = mmCPU_BOOT_DEV_STS0; 3742 static_loader->cpu_boot_dev_status1_reg = mmCPU_BOOT_DEV_STS1; 3743 static_loader->boot_err0_reg = mmCPU_BOOT_ERR0; 3744 static_loader->boot_err1_reg = mmCPU_BOOT_ERR1; 3745 static_loader->preboot_version_offset_reg = mmPREBOOT_VER_OFFSET; 3746 static_loader->boot_fit_version_offset_reg = mmUBOOT_VER_OFFSET; 3747 static_loader->sram_offset_mask = ~(lower_32_bits(SRAM_BASE_ADDR)); 3748 static_loader->cpu_reset_wait_msec = hdev->pldm ? 3749 GAUDI_PLDM_RESET_WAIT_MSEC : 3750 GAUDI_CPU_RESET_WAIT_MSEC; 3751 } 3752 3753 static void gaudi_init_firmware_preload_params(struct hl_device *hdev) 3754 { 3755 struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; 3756 3757 pre_fw_load->cpu_boot_status_reg = mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS; 3758 pre_fw_load->sts_boot_dev_sts0_reg = mmCPU_BOOT_DEV_STS0; 3759 pre_fw_load->sts_boot_dev_sts1_reg = mmCPU_BOOT_DEV_STS1; 3760 pre_fw_load->boot_err0_reg = mmCPU_BOOT_ERR0; 3761 pre_fw_load->boot_err1_reg = mmCPU_BOOT_ERR1; 3762 pre_fw_load->wait_for_preboot_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3763 } 3764 3765 static void gaudi_init_firmware_loader(struct hl_device *hdev) 3766 { 3767 struct asic_fixed_properties *prop = &hdev->asic_prop; 3768 struct fw_load_mgr *fw_loader = &hdev->fw_loader; 3769 3770 /* fill common fields */ 3771 fw_loader->fw_comp_loaded = FW_TYPE_NONE; 3772 fw_loader->boot_fit_img.image_name = GAUDI_BOOT_FIT_FILE; 3773 fw_loader->linux_img.image_name = GAUDI_LINUX_FW_FILE; 3774 fw_loader->cpu_timeout = GAUDI_CPU_TIMEOUT_USEC; 3775 fw_loader->boot_fit_timeout = GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC; 3776 fw_loader->skip_bmc = !hdev->bmc_enable; 3777 fw_loader->sram_bar_id = SRAM_BAR_ID; 3778 fw_loader->dram_bar_id = HBM_BAR_ID; 3779 3780 if (prop->dynamic_fw_load) 3781 gaudi_init_dynamic_firmware_loader(hdev); 3782 else 3783 gaudi_init_static_firmware_loader(hdev); 3784 } 3785 3786 static int gaudi_init_cpu(struct hl_device *hdev) 3787 { 3788 struct gaudi_device *gaudi = hdev->asic_specific; 3789 int rc; 3790 3791 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU)) 3792 return 0; 3793 3794 if (gaudi->hw_cap_initialized & HW_CAP_CPU) 3795 return 0; 3796 3797 /* 3798 * The device CPU works with 40 bits addresses. 3799 * This register sets the extension to 50 bits. 3800 */ 3801 if (!hdev->asic_prop.fw_security_enabled) 3802 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr); 3803 3804 rc = hl_fw_init_cpu(hdev); 3805 3806 if (rc) 3807 return rc; 3808 3809 gaudi->hw_cap_initialized |= HW_CAP_CPU; 3810 3811 return 0; 3812 } 3813 3814 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3815 { 3816 struct cpu_dyn_regs *dyn_regs = 3817 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 3818 struct asic_fixed_properties *prop = &hdev->asic_prop; 3819 struct gaudi_device *gaudi = hdev->asic_specific; 3820 u32 status, irq_handler_offset; 3821 struct hl_eq *eq; 3822 struct hl_hw_queue *cpu_pq = 3823 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 3824 int err; 3825 3826 if (!hdev->cpu_queues_enable) 3827 return 0; 3828 3829 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 3830 return 0; 3831 3832 eq = &hdev->event_queue; 3833 3834 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address)); 3835 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address)); 3836 3837 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address)); 3838 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address)); 3839 3840 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW, 3841 lower_32_bits(hdev->cpu_accessible_dma_address)); 3842 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH, 3843 upper_32_bits(hdev->cpu_accessible_dma_address)); 3844 3845 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES); 3846 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES); 3847 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE); 3848 3849 /* Used for EQ CI */ 3850 WREG32(mmCPU_IF_EQ_RD_OFFS, 0); 3851 3852 WREG32(mmCPU_IF_PF_PQ_PI, 0); 3853 3854 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI); 3855 3856 irq_handler_offset = prop->gic_interrupts_enable ? 3857 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 3858 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 3859 3860 WREG32(irq_handler_offset, 3861 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 3862 3863 err = hl_poll_timeout( 3864 hdev, 3865 mmCPU_IF_QUEUE_INIT, 3866 status, 3867 (status == PQ_INIT_STATUS_READY_FOR_HOST), 3868 1000, 3869 cpu_timeout); 3870 3871 if (err) { 3872 dev_err(hdev->dev, 3873 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3874 return -EIO; 3875 } 3876 3877 /* update FW application security bits */ 3878 if (prop->fw_cpu_boot_dev_sts0_valid) 3879 prop->fw_app_cpu_boot_dev_sts0 = RREG32(mmCPU_BOOT_DEV_STS0); 3880 if (prop->fw_cpu_boot_dev_sts1_valid) 3881 prop->fw_app_cpu_boot_dev_sts1 = RREG32(mmCPU_BOOT_DEV_STS1); 3882 3883 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3884 return 0; 3885 } 3886 3887 static void gaudi_pre_hw_init(struct hl_device *hdev) 3888 { 3889 /* Perform read from the device to make sure device is up */ 3890 RREG32(mmHW_STATE); 3891 3892 if (!hdev->asic_prop.fw_security_enabled) { 3893 /* Set the access through PCI bars (Linux driver only) as 3894 * secured 3895 */ 3896 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, 3897 (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK | 3898 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK)); 3899 3900 /* Perform read to flush the waiting writes to ensure 3901 * configuration was set in the device 3902 */ 3903 RREG32(mmPCIE_WRAP_LBW_PROT_OVR); 3904 } 3905 3906 /* 3907 * Let's mark in the H/W that we have reached this point. We check 3908 * this value in the reset_before_init function to understand whether 3909 * we need to reset the chip before doing H/W init. This register is 3910 * cleared by the H/W upon H/W reset 3911 */ 3912 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY); 3913 } 3914 3915 static int gaudi_hw_init(struct hl_device *hdev) 3916 { 3917 struct gaudi_device *gaudi = hdev->asic_specific; 3918 int rc; 3919 3920 gaudi_pre_hw_init(hdev); 3921 3922 /* If iATU is done by FW, the HBM bar ALWAYS points to DRAM_PHYS_BASE. 3923 * So we set it here and if anyone tries to move it later to 3924 * a different address, there will be an error 3925 */ 3926 if (hdev->asic_prop.iatu_done_by_fw) 3927 gaudi->hbm_bar_cur_addr = DRAM_PHYS_BASE; 3928 3929 /* 3930 * Before pushing u-boot/linux to device, need to set the hbm bar to 3931 * base address of dram 3932 */ 3933 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) { 3934 dev_err(hdev->dev, 3935 "failed to map HBM bar to DRAM base address\n"); 3936 return -EIO; 3937 } 3938 3939 rc = gaudi_init_cpu(hdev); 3940 if (rc) { 3941 dev_err(hdev->dev, "failed to initialize CPU\n"); 3942 return rc; 3943 } 3944 3945 /* In case the clock gating was enabled in preboot we need to disable 3946 * it here before touching the MME/TPC registers. 3947 */ 3948 gaudi_disable_clock_gating(hdev); 3949 3950 /* SRAM scrambler must be initialized after CPU is running from HBM */ 3951 gaudi_init_scrambler_sram(hdev); 3952 3953 /* This is here just in case we are working without CPU */ 3954 gaudi_init_scrambler_hbm(hdev); 3955 3956 gaudi_init_golden_registers(hdev); 3957 3958 rc = gaudi_mmu_init(hdev); 3959 if (rc) 3960 return rc; 3961 3962 gaudi_init_security(hdev); 3963 3964 gaudi_init_pci_dma_qmans(hdev); 3965 3966 gaudi_init_hbm_dma_qmans(hdev); 3967 3968 gaudi_init_mme_qmans(hdev); 3969 3970 gaudi_init_tpc_qmans(hdev); 3971 3972 gaudi_init_nic_qmans(hdev); 3973 3974 gaudi_enable_timestamp(hdev); 3975 3976 /* MSI must be enabled before CPU queues and NIC are initialized */ 3977 rc = gaudi_enable_msi(hdev); 3978 if (rc) 3979 goto disable_queues; 3980 3981 /* must be called after MSI was enabled */ 3982 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC); 3983 if (rc) { 3984 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n", 3985 rc); 3986 goto disable_msi; 3987 } 3988 3989 /* Perform read from the device to flush all configuration */ 3990 RREG32(mmHW_STATE); 3991 3992 return 0; 3993 3994 disable_msi: 3995 gaudi_disable_msi(hdev); 3996 disable_queues: 3997 gaudi_disable_mme_qmans(hdev); 3998 gaudi_disable_pci_dma_qmans(hdev); 3999 4000 return rc; 4001 } 4002 4003 static int gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset) 4004 { 4005 struct cpu_dyn_regs *dyn_regs = 4006 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4007 u32 status, reset_timeout_ms, cpu_timeout_ms, irq_handler_offset; 4008 struct gaudi_device *gaudi = hdev->asic_specific; 4009 bool driver_performs_reset; 4010 4011 if (!hard_reset) { 4012 dev_err(hdev->dev, "GAUDI doesn't support soft-reset\n"); 4013 return 0; 4014 } 4015 4016 if (hdev->pldm) { 4017 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC; 4018 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; 4019 } else { 4020 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC; 4021 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC; 4022 } 4023 4024 if (fw_reset) { 4025 dev_dbg(hdev->dev, 4026 "Firmware performs HARD reset, going to wait %dms\n", 4027 reset_timeout_ms); 4028 4029 goto skip_reset; 4030 } 4031 4032 driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled && 4033 !hdev->asic_prop.hard_reset_done_by_fw); 4034 4035 /* Set device to handle FLR by H/W as we will put the device CPU to 4036 * halt mode 4037 */ 4038 if (driver_performs_reset) 4039 WREG32(mmPCIE_AUX_FLR_CTRL, (PCIE_AUX_FLR_CTRL_HW_CTRL_MASK | 4040 PCIE_AUX_FLR_CTRL_INT_MASK_MASK)); 4041 4042 /* If linux is loaded in the device CPU we need to communicate with it 4043 * via the GIC. Otherwise, we need to use COMMS or the MSG_TO_CPU 4044 * registers in case of old F/Ws 4045 */ 4046 if (hdev->fw_loader.fw_comp_loaded & FW_TYPE_LINUX) { 4047 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4048 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4049 le32_to_cpu(dyn_regs->gic_host_halt_irq); 4050 4051 WREG32(irq_handler_offset, 4052 gaudi_irq_map_table[GAUDI_EVENT_HALT_MACHINE].cpu_id); 4053 4054 /* This is a hail-mary attempt to revive the card in the small chance that the 4055 * f/w has experienced a watchdog event, which caused it to return back to preboot. 4056 * In that case, triggering reset through GIC won't help. We need to trigger the 4057 * reset as if Linux wasn't loaded. 4058 * 4059 * We do it only if the reset cause was HB, because that would be the indication 4060 * of such an event. 4061 * 4062 * In case watchdog hasn't expired but we still got HB, then this won't do any 4063 * damage. 4064 */ 4065 if (hdev->reset_info.curr_reset_cause == HL_RESET_CAUSE_HEARTBEAT) { 4066 if (hdev->asic_prop.hard_reset_done_by_fw) 4067 hl_fw_ask_hard_reset_without_linux(hdev); 4068 else 4069 hl_fw_ask_halt_machine_without_linux(hdev); 4070 } 4071 } else { 4072 if (hdev->asic_prop.hard_reset_done_by_fw) 4073 hl_fw_ask_hard_reset_without_linux(hdev); 4074 else 4075 hl_fw_ask_halt_machine_without_linux(hdev); 4076 } 4077 4078 if (driver_performs_reset) { 4079 4080 /* Configure the reset registers. Must be done as early as 4081 * possible in case we fail during H/W initialization 4082 */ 4083 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H, 4084 (CFG_RST_H_DMA_MASK | 4085 CFG_RST_H_MME_MASK | 4086 CFG_RST_H_SM_MASK | 4087 CFG_RST_H_TPC_7_MASK)); 4088 4089 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK); 4090 4091 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H, 4092 (CFG_RST_H_HBM_MASK | 4093 CFG_RST_H_TPC_7_MASK | 4094 CFG_RST_H_NIC_MASK | 4095 CFG_RST_H_SM_MASK | 4096 CFG_RST_H_DMA_MASK | 4097 CFG_RST_H_MME_MASK | 4098 CFG_RST_H_CPU_MASK | 4099 CFG_RST_H_MMU_MASK)); 4100 4101 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L, 4102 (CFG_RST_L_IF_MASK | 4103 CFG_RST_L_PSOC_MASK | 4104 CFG_RST_L_TPC_MASK)); 4105 4106 msleep(cpu_timeout_ms); 4107 4108 /* Tell ASIC not to re-initialize PCIe */ 4109 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC); 4110 4111 /* Restart BTL/BLR upon hard-reset */ 4112 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1); 4113 4114 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 4115 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); 4116 4117 dev_dbg(hdev->dev, 4118 "Issued HARD reset command, going to wait %dms\n", 4119 reset_timeout_ms); 4120 } else { 4121 dev_dbg(hdev->dev, 4122 "Firmware performs HARD reset, going to wait %dms\n", 4123 reset_timeout_ms); 4124 } 4125 4126 skip_reset: 4127 /* 4128 * After hard reset, we can't poll the BTM_FSM register because the PSOC 4129 * itself is in reset. Need to wait until the reset is deasserted 4130 */ 4131 msleep(reset_timeout_ms); 4132 4133 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM); 4134 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK) { 4135 dev_err(hdev->dev, "Timeout while waiting for device to reset 0x%x\n", status); 4136 return -ETIMEDOUT; 4137 } 4138 4139 if (gaudi) { 4140 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q | HW_CAP_HBM | 4141 HW_CAP_PCI_DMA | HW_CAP_MME | HW_CAP_TPC_MASK | 4142 HW_CAP_HBM_DMA | HW_CAP_PLL | HW_CAP_NIC_MASK | 4143 HW_CAP_MMU | HW_CAP_SRAM_SCRAMBLER | 4144 HW_CAP_HBM_SCRAMBLER); 4145 4146 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat)); 4147 4148 hdev->device_cpu_is_halted = false; 4149 } 4150 return 0; 4151 } 4152 4153 static int gaudi_suspend(struct hl_device *hdev) 4154 { 4155 return hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 4156 } 4157 4158 static int gaudi_resume(struct hl_device *hdev) 4159 { 4160 return gaudi_init_iatu(hdev); 4161 } 4162 4163 static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma, 4164 void *cpu_addr, dma_addr_t dma_addr, size_t size) 4165 { 4166 int rc; 4167 4168 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4169 VM_DONTCOPY | VM_NORESERVE); 4170 4171 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4172 (dma_addr - HOST_PHYS_BASE), size); 4173 if (rc) 4174 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4175 4176 return rc; 4177 } 4178 4179 static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi) 4180 { 4181 struct cpu_dyn_regs *dyn_regs = 4182 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 4183 u32 db_reg_offset, db_value, dma_qm_offset, q_off, irq_handler_offset; 4184 struct gaudi_device *gaudi = hdev->asic_specific; 4185 bool invalid_queue = false; 4186 int dma_id; 4187 4188 switch (hw_queue_id) { 4189 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3: 4190 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 4191 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4192 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4193 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4194 break; 4195 4196 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3: 4197 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 4198 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4199 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4; 4200 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4201 break; 4202 4203 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3: 4204 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1]; 4205 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4206 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4207 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4208 break; 4209 4210 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3: 4211 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2]; 4212 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4213 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4214 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4215 break; 4216 4217 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3: 4218 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3]; 4219 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4220 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4221 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4222 break; 4223 4224 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3: 4225 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4]; 4226 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4227 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4228 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4229 break; 4230 4231 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3: 4232 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5]; 4233 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4234 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4235 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4236 break; 4237 4238 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3: 4239 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_6]; 4240 dma_qm_offset = dma_id * DMA_QMAN_OFFSET; 4241 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4; 4242 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off; 4243 break; 4244 4245 case GAUDI_QUEUE_ID_CPU_PQ: 4246 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q) 4247 db_reg_offset = mmCPU_IF_PF_PQ_PI; 4248 else 4249 invalid_queue = true; 4250 break; 4251 4252 case GAUDI_QUEUE_ID_MME_0_0: 4253 db_reg_offset = mmMME2_QM_PQ_PI_0; 4254 break; 4255 4256 case GAUDI_QUEUE_ID_MME_0_1: 4257 db_reg_offset = mmMME2_QM_PQ_PI_1; 4258 break; 4259 4260 case GAUDI_QUEUE_ID_MME_0_2: 4261 db_reg_offset = mmMME2_QM_PQ_PI_2; 4262 break; 4263 4264 case GAUDI_QUEUE_ID_MME_0_3: 4265 db_reg_offset = mmMME2_QM_PQ_PI_3; 4266 break; 4267 4268 case GAUDI_QUEUE_ID_MME_1_0: 4269 db_reg_offset = mmMME0_QM_PQ_PI_0; 4270 break; 4271 4272 case GAUDI_QUEUE_ID_MME_1_1: 4273 db_reg_offset = mmMME0_QM_PQ_PI_1; 4274 break; 4275 4276 case GAUDI_QUEUE_ID_MME_1_2: 4277 db_reg_offset = mmMME0_QM_PQ_PI_2; 4278 break; 4279 4280 case GAUDI_QUEUE_ID_MME_1_3: 4281 db_reg_offset = mmMME0_QM_PQ_PI_3; 4282 break; 4283 4284 case GAUDI_QUEUE_ID_TPC_0_0: 4285 db_reg_offset = mmTPC0_QM_PQ_PI_0; 4286 break; 4287 4288 case GAUDI_QUEUE_ID_TPC_0_1: 4289 db_reg_offset = mmTPC0_QM_PQ_PI_1; 4290 break; 4291 4292 case GAUDI_QUEUE_ID_TPC_0_2: 4293 db_reg_offset = mmTPC0_QM_PQ_PI_2; 4294 break; 4295 4296 case GAUDI_QUEUE_ID_TPC_0_3: 4297 db_reg_offset = mmTPC0_QM_PQ_PI_3; 4298 break; 4299 4300 case GAUDI_QUEUE_ID_TPC_1_0: 4301 db_reg_offset = mmTPC1_QM_PQ_PI_0; 4302 break; 4303 4304 case GAUDI_QUEUE_ID_TPC_1_1: 4305 db_reg_offset = mmTPC1_QM_PQ_PI_1; 4306 break; 4307 4308 case GAUDI_QUEUE_ID_TPC_1_2: 4309 db_reg_offset = mmTPC1_QM_PQ_PI_2; 4310 break; 4311 4312 case GAUDI_QUEUE_ID_TPC_1_3: 4313 db_reg_offset = mmTPC1_QM_PQ_PI_3; 4314 break; 4315 4316 case GAUDI_QUEUE_ID_TPC_2_0: 4317 db_reg_offset = mmTPC2_QM_PQ_PI_0; 4318 break; 4319 4320 case GAUDI_QUEUE_ID_TPC_2_1: 4321 db_reg_offset = mmTPC2_QM_PQ_PI_1; 4322 break; 4323 4324 case GAUDI_QUEUE_ID_TPC_2_2: 4325 db_reg_offset = mmTPC2_QM_PQ_PI_2; 4326 break; 4327 4328 case GAUDI_QUEUE_ID_TPC_2_3: 4329 db_reg_offset = mmTPC2_QM_PQ_PI_3; 4330 break; 4331 4332 case GAUDI_QUEUE_ID_TPC_3_0: 4333 db_reg_offset = mmTPC3_QM_PQ_PI_0; 4334 break; 4335 4336 case GAUDI_QUEUE_ID_TPC_3_1: 4337 db_reg_offset = mmTPC3_QM_PQ_PI_1; 4338 break; 4339 4340 case GAUDI_QUEUE_ID_TPC_3_2: 4341 db_reg_offset = mmTPC3_QM_PQ_PI_2; 4342 break; 4343 4344 case GAUDI_QUEUE_ID_TPC_3_3: 4345 db_reg_offset = mmTPC3_QM_PQ_PI_3; 4346 break; 4347 4348 case GAUDI_QUEUE_ID_TPC_4_0: 4349 db_reg_offset = mmTPC4_QM_PQ_PI_0; 4350 break; 4351 4352 case GAUDI_QUEUE_ID_TPC_4_1: 4353 db_reg_offset = mmTPC4_QM_PQ_PI_1; 4354 break; 4355 4356 case GAUDI_QUEUE_ID_TPC_4_2: 4357 db_reg_offset = mmTPC4_QM_PQ_PI_2; 4358 break; 4359 4360 case GAUDI_QUEUE_ID_TPC_4_3: 4361 db_reg_offset = mmTPC4_QM_PQ_PI_3; 4362 break; 4363 4364 case GAUDI_QUEUE_ID_TPC_5_0: 4365 db_reg_offset = mmTPC5_QM_PQ_PI_0; 4366 break; 4367 4368 case GAUDI_QUEUE_ID_TPC_5_1: 4369 db_reg_offset = mmTPC5_QM_PQ_PI_1; 4370 break; 4371 4372 case GAUDI_QUEUE_ID_TPC_5_2: 4373 db_reg_offset = mmTPC5_QM_PQ_PI_2; 4374 break; 4375 4376 case GAUDI_QUEUE_ID_TPC_5_3: 4377 db_reg_offset = mmTPC5_QM_PQ_PI_3; 4378 break; 4379 4380 case GAUDI_QUEUE_ID_TPC_6_0: 4381 db_reg_offset = mmTPC6_QM_PQ_PI_0; 4382 break; 4383 4384 case GAUDI_QUEUE_ID_TPC_6_1: 4385 db_reg_offset = mmTPC6_QM_PQ_PI_1; 4386 break; 4387 4388 case GAUDI_QUEUE_ID_TPC_6_2: 4389 db_reg_offset = mmTPC6_QM_PQ_PI_2; 4390 break; 4391 4392 case GAUDI_QUEUE_ID_TPC_6_3: 4393 db_reg_offset = mmTPC6_QM_PQ_PI_3; 4394 break; 4395 4396 case GAUDI_QUEUE_ID_TPC_7_0: 4397 db_reg_offset = mmTPC7_QM_PQ_PI_0; 4398 break; 4399 4400 case GAUDI_QUEUE_ID_TPC_7_1: 4401 db_reg_offset = mmTPC7_QM_PQ_PI_1; 4402 break; 4403 4404 case GAUDI_QUEUE_ID_TPC_7_2: 4405 db_reg_offset = mmTPC7_QM_PQ_PI_2; 4406 break; 4407 4408 case GAUDI_QUEUE_ID_TPC_7_3: 4409 db_reg_offset = mmTPC7_QM_PQ_PI_3; 4410 break; 4411 4412 case GAUDI_QUEUE_ID_NIC_0_0...GAUDI_QUEUE_ID_NIC_0_3: 4413 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC0)) 4414 invalid_queue = true; 4415 4416 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4417 db_reg_offset = mmNIC0_QM0_PQ_PI_0 + q_off; 4418 break; 4419 4420 case GAUDI_QUEUE_ID_NIC_1_0...GAUDI_QUEUE_ID_NIC_1_3: 4421 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC1)) 4422 invalid_queue = true; 4423 4424 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4425 db_reg_offset = mmNIC0_QM1_PQ_PI_0 + q_off; 4426 break; 4427 4428 case GAUDI_QUEUE_ID_NIC_2_0...GAUDI_QUEUE_ID_NIC_2_3: 4429 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC2)) 4430 invalid_queue = true; 4431 4432 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4433 db_reg_offset = mmNIC1_QM0_PQ_PI_0 + q_off; 4434 break; 4435 4436 case GAUDI_QUEUE_ID_NIC_3_0...GAUDI_QUEUE_ID_NIC_3_3: 4437 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC3)) 4438 invalid_queue = true; 4439 4440 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4441 db_reg_offset = mmNIC1_QM1_PQ_PI_0 + q_off; 4442 break; 4443 4444 case GAUDI_QUEUE_ID_NIC_4_0...GAUDI_QUEUE_ID_NIC_4_3: 4445 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC4)) 4446 invalid_queue = true; 4447 4448 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4449 db_reg_offset = mmNIC2_QM0_PQ_PI_0 + q_off; 4450 break; 4451 4452 case GAUDI_QUEUE_ID_NIC_5_0...GAUDI_QUEUE_ID_NIC_5_3: 4453 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC5)) 4454 invalid_queue = true; 4455 4456 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4457 db_reg_offset = mmNIC2_QM1_PQ_PI_0 + q_off; 4458 break; 4459 4460 case GAUDI_QUEUE_ID_NIC_6_0...GAUDI_QUEUE_ID_NIC_6_3: 4461 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC6)) 4462 invalid_queue = true; 4463 4464 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4465 db_reg_offset = mmNIC3_QM0_PQ_PI_0 + q_off; 4466 break; 4467 4468 case GAUDI_QUEUE_ID_NIC_7_0...GAUDI_QUEUE_ID_NIC_7_3: 4469 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC7)) 4470 invalid_queue = true; 4471 4472 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4473 db_reg_offset = mmNIC3_QM1_PQ_PI_0 + q_off; 4474 break; 4475 4476 case GAUDI_QUEUE_ID_NIC_8_0...GAUDI_QUEUE_ID_NIC_8_3: 4477 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC8)) 4478 invalid_queue = true; 4479 4480 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4481 db_reg_offset = mmNIC4_QM0_PQ_PI_0 + q_off; 4482 break; 4483 4484 case GAUDI_QUEUE_ID_NIC_9_0...GAUDI_QUEUE_ID_NIC_9_3: 4485 if (!(gaudi->hw_cap_initialized & HW_CAP_NIC9)) 4486 invalid_queue = true; 4487 4488 q_off = ((hw_queue_id - 1) & 0x3) * 4; 4489 db_reg_offset = mmNIC4_QM1_PQ_PI_0 + q_off; 4490 break; 4491 4492 default: 4493 invalid_queue = true; 4494 } 4495 4496 if (invalid_queue) { 4497 /* Should never get here */ 4498 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n", 4499 hw_queue_id); 4500 return; 4501 } 4502 4503 db_value = pi; 4504 4505 /* ring the doorbell */ 4506 WREG32(db_reg_offset, db_value); 4507 4508 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4509 /* make sure device CPU will read latest data from host */ 4510 mb(); 4511 4512 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 4513 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 4514 le32_to_cpu(dyn_regs->gic_host_pi_upd_irq); 4515 4516 WREG32(irq_handler_offset, 4517 gaudi_irq_map_table[GAUDI_EVENT_PI_UPDATE].cpu_id); 4518 } 4519 } 4520 4521 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, 4522 struct hl_bd *bd) 4523 { 4524 __le64 *pbd = (__le64 *) bd; 4525 4526 /* The QMANs are on the host memory so a simple copy suffice */ 4527 pqe[0] = pbd[0]; 4528 pqe[1] = pbd[1]; 4529 } 4530 4531 static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size, 4532 dma_addr_t *dma_handle, gfp_t flags) 4533 { 4534 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size, 4535 dma_handle, flags); 4536 4537 /* Shift to the device's base physical address of host memory */ 4538 if (kernel_addr) 4539 *dma_handle += HOST_PHYS_BASE; 4540 4541 return kernel_addr; 4542 } 4543 4544 static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, 4545 void *cpu_addr, dma_addr_t dma_handle) 4546 { 4547 /* Cancel the device's base physical address of host memory */ 4548 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE; 4549 4550 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); 4551 } 4552 4553 static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) 4554 { 4555 struct asic_fixed_properties *prop = &hdev->asic_prop; 4556 u64 cur_addr = prop->dram_user_base_address; 4557 u32 chunk_size, busy; 4558 int rc, dma_id; 4559 4560 while (cur_addr < prop->dram_end_address) { 4561 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4562 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4563 4564 chunk_size = 4565 min((u64)SZ_2G, prop->dram_end_address - cur_addr); 4566 4567 dev_dbg(hdev->dev, 4568 "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", 4569 cur_addr, cur_addr + chunk_size); 4570 4571 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 4572 lower_32_bits(val)); 4573 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 4574 upper_32_bits(val)); 4575 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, 4576 lower_32_bits(cur_addr)); 4577 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, 4578 upper_32_bits(cur_addr)); 4579 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, 4580 chunk_size); 4581 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 4582 ((1 << DMA0_CORE_COMMIT_LIN_SHIFT) | 4583 (1 << DMA0_CORE_COMMIT_MEM_SET_SHIFT))); 4584 4585 cur_addr += chunk_size; 4586 4587 if (cur_addr == prop->dram_end_address) 4588 break; 4589 } 4590 4591 for (dma_id = 0 ; dma_id < DMA_NUMBER_OF_CHANNELS ; dma_id++) { 4592 u32 dma_offset = dma_id * DMA_CORE_OFFSET; 4593 4594 rc = hl_poll_timeout( 4595 hdev, 4596 mmDMA0_CORE_STS0 + dma_offset, 4597 busy, 4598 ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 4599 1000, 4600 HBM_SCRUBBING_TIMEOUT_US); 4601 4602 if (rc) { 4603 dev_err(hdev->dev, 4604 "DMA Timeout during HBM scrubbing of DMA #%d\n", 4605 dma_id); 4606 return -EIO; 4607 } 4608 } 4609 } 4610 4611 return 0; 4612 } 4613 4614 static int gaudi_scrub_device_mem(struct hl_device *hdev) 4615 { 4616 struct asic_fixed_properties *prop = &hdev->asic_prop; 4617 u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US; 4618 u64 addr, size, val = hdev->memory_scrub_val; 4619 ktime_t timeout; 4620 int rc = 0; 4621 4622 if (!hdev->memory_scrub) 4623 return 0; 4624 4625 timeout = ktime_add_us(ktime_get(), wait_to_idle_time); 4626 while (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) { 4627 if (ktime_compare(ktime_get(), timeout) > 0) { 4628 dev_err(hdev->dev, "waiting for idle timeout\n"); 4629 return -ETIMEDOUT; 4630 } 4631 usleep_range((1000 >> 2) + 1, 1000); 4632 } 4633 4634 /* Scrub SRAM */ 4635 addr = prop->sram_user_base_address; 4636 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4637 4638 dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4639 addr, addr + size, val); 4640 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4641 if (rc) { 4642 dev_err(hdev->dev, "Failed to clear SRAM (%d)\n", rc); 4643 return rc; 4644 } 4645 4646 /* Scrub HBM using all DMA channels in parallel */ 4647 rc = gaudi_scrub_device_dram(hdev, val); 4648 if (rc) { 4649 dev_err(hdev->dev, "Failed to clear HBM (%d)\n", rc); 4650 return rc; 4651 } 4652 4653 return 0; 4654 } 4655 4656 static void *gaudi_get_int_queue_base(struct hl_device *hdev, 4657 u32 queue_id, dma_addr_t *dma_handle, 4658 u16 *queue_len) 4659 { 4660 struct gaudi_device *gaudi = hdev->asic_specific; 4661 struct gaudi_internal_qman_info *q; 4662 4663 if (queue_id >= GAUDI_QUEUE_ID_SIZE || 4664 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) { 4665 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id); 4666 return NULL; 4667 } 4668 4669 q = &gaudi->internal_qmans[queue_id]; 4670 *dma_handle = q->pq_dma_addr; 4671 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE; 4672 4673 return q->pq_kernel_addr; 4674 } 4675 4676 static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg, 4677 u16 len, u32 timeout, u64 *result) 4678 { 4679 struct gaudi_device *gaudi = hdev->asic_specific; 4680 4681 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) { 4682 if (result) 4683 *result = 0; 4684 return 0; 4685 } 4686 4687 if (!timeout) 4688 timeout = GAUDI_MSG_TO_CPU_TIMEOUT_USEC; 4689 4690 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len, 4691 timeout, result); 4692 } 4693 4694 static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id) 4695 { 4696 struct packet_msg_prot *fence_pkt; 4697 dma_addr_t pkt_dma_addr; 4698 u32 fence_val, tmp, timeout_usec; 4699 dma_addr_t fence_dma_addr; 4700 u32 *fence_ptr; 4701 int rc; 4702 4703 if (hdev->pldm) 4704 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC; 4705 else 4706 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC; 4707 4708 fence_val = GAUDI_QMAN0_FENCE_VAL; 4709 4710 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 4711 if (!fence_ptr) { 4712 dev_err(hdev->dev, 4713 "Failed to allocate memory for H/W queue %d testing\n", 4714 hw_queue_id); 4715 return -ENOMEM; 4716 } 4717 4718 *fence_ptr = 0; 4719 4720 fence_pkt = hl_asic_dma_pool_zalloc(hdev, sizeof(struct packet_msg_prot), GFP_KERNEL, 4721 &pkt_dma_addr); 4722 if (!fence_pkt) { 4723 dev_err(hdev->dev, 4724 "Failed to allocate packet for H/W queue %d testing\n", 4725 hw_queue_id); 4726 rc = -ENOMEM; 4727 goto free_fence_ptr; 4728 } 4729 4730 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 4731 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 4732 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 4733 4734 fence_pkt->ctl = cpu_to_le32(tmp); 4735 fence_pkt->value = cpu_to_le32(fence_val); 4736 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 4737 4738 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, 4739 sizeof(struct packet_msg_prot), 4740 pkt_dma_addr); 4741 if (rc) { 4742 dev_err(hdev->dev, 4743 "Failed to send fence packet to H/W queue %d\n", 4744 hw_queue_id); 4745 goto free_pkt; 4746 } 4747 4748 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val), 4749 1000, timeout_usec, true); 4750 4751 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id); 4752 4753 if (rc == -ETIMEDOUT) { 4754 dev_err(hdev->dev, 4755 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n", 4756 hw_queue_id, (unsigned long long) fence_dma_addr, tmp); 4757 rc = -EIO; 4758 } 4759 4760 free_pkt: 4761 hl_asic_dma_pool_free(hdev, (void *) fence_pkt, pkt_dma_addr); 4762 free_fence_ptr: 4763 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 4764 return rc; 4765 } 4766 4767 static int gaudi_test_cpu_queue(struct hl_device *hdev) 4768 { 4769 struct gaudi_device *gaudi = hdev->asic_specific; 4770 4771 /* 4772 * check capability here as send_cpu_message() won't update the result 4773 * value if no capability 4774 */ 4775 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 4776 return 0; 4777 4778 return hl_fw_test_cpu_queue(hdev); 4779 } 4780 4781 static int gaudi_test_queues(struct hl_device *hdev) 4782 { 4783 int i, rc, ret_val = 0; 4784 4785 for (i = 0 ; i < hdev->asic_prop.max_queues ; i++) { 4786 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) { 4787 rc = gaudi_test_queue(hdev, i); 4788 if (rc) 4789 ret_val = -EINVAL; 4790 } 4791 } 4792 4793 rc = gaudi_test_cpu_queue(hdev); 4794 if (rc) 4795 ret_val = -EINVAL; 4796 4797 return ret_val; 4798 } 4799 4800 static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size, 4801 gfp_t mem_flags, dma_addr_t *dma_handle) 4802 { 4803 void *kernel_addr; 4804 4805 if (size > GAUDI_DMA_POOL_BLK_SIZE) 4806 return NULL; 4807 4808 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle); 4809 4810 /* Shift to the device's base physical address of host memory */ 4811 if (kernel_addr) 4812 *dma_handle += HOST_PHYS_BASE; 4813 4814 return kernel_addr; 4815 } 4816 4817 static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr, 4818 dma_addr_t dma_addr) 4819 { 4820 /* Cancel the device's base physical address of host memory */ 4821 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE; 4822 4823 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr); 4824 } 4825 4826 static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, 4827 size_t size, dma_addr_t *dma_handle) 4828 { 4829 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 4830 } 4831 4832 static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, 4833 size_t size, void *vaddr) 4834 { 4835 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); 4836 } 4837 4838 static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) 4839 { 4840 struct scatterlist *sg, *sg_next_iter; 4841 u32 count, dma_desc_cnt; 4842 u64 len, len_next; 4843 dma_addr_t addr, addr_next; 4844 4845 dma_desc_cnt = 0; 4846 4847 for_each_sgtable_dma_sg(sgt, sg, count) { 4848 len = sg_dma_len(sg); 4849 addr = sg_dma_address(sg); 4850 4851 if (len == 0) 4852 break; 4853 4854 while ((count + 1) < sgt->nents) { 4855 sg_next_iter = sg_next(sg); 4856 len_next = sg_dma_len(sg_next_iter); 4857 addr_next = sg_dma_address(sg_next_iter); 4858 4859 if (len_next == 0) 4860 break; 4861 4862 if ((addr + len == addr_next) && 4863 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 4864 len += len_next; 4865 count++; 4866 sg = sg_next_iter; 4867 } else { 4868 break; 4869 } 4870 } 4871 4872 dma_desc_cnt++; 4873 } 4874 4875 return dma_desc_cnt * sizeof(struct packet_lin_dma); 4876 } 4877 4878 static int gaudi_pin_memory_before_cs(struct hl_device *hdev, 4879 struct hl_cs_parser *parser, 4880 struct packet_lin_dma *user_dma_pkt, 4881 u64 addr, enum dma_data_direction dir) 4882 { 4883 struct hl_userptr *userptr; 4884 int rc; 4885 4886 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4887 parser->job_userptr_list, &userptr)) 4888 goto already_pinned; 4889 4890 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 4891 if (!userptr) 4892 return -ENOMEM; 4893 4894 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize), 4895 userptr); 4896 if (rc) 4897 goto free_userptr; 4898 4899 list_add_tail(&userptr->job_node, parser->job_userptr_list); 4900 4901 rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir); 4902 if (rc) { 4903 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 4904 goto unpin_memory; 4905 } 4906 4907 userptr->dma_mapped = true; 4908 userptr->dir = dir; 4909 4910 already_pinned: 4911 parser->patched_cb_size += 4912 gaudi_get_dma_desc_list_size(hdev, userptr->sgt); 4913 4914 return 0; 4915 4916 unpin_memory: 4917 list_del(&userptr->job_node); 4918 hl_unpin_host_memory(hdev, userptr); 4919 free_userptr: 4920 kfree(userptr); 4921 return rc; 4922 } 4923 4924 static int gaudi_validate_dma_pkt_host(struct hl_device *hdev, 4925 struct hl_cs_parser *parser, 4926 struct packet_lin_dma *user_dma_pkt, 4927 bool src_in_host) 4928 { 4929 enum dma_data_direction dir; 4930 bool skip_host_mem_pin = false, user_memset; 4931 u64 addr; 4932 int rc = 0; 4933 4934 user_memset = (le32_to_cpu(user_dma_pkt->ctl) & 4935 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 4936 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 4937 4938 if (src_in_host) { 4939 if (user_memset) 4940 skip_host_mem_pin = true; 4941 4942 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n"); 4943 dir = DMA_TO_DEVICE; 4944 addr = le64_to_cpu(user_dma_pkt->src_addr); 4945 } else { 4946 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n"); 4947 dir = DMA_FROM_DEVICE; 4948 addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4949 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4950 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4951 } 4952 4953 if (skip_host_mem_pin) 4954 parser->patched_cb_size += sizeof(*user_dma_pkt); 4955 else 4956 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt, 4957 addr, dir); 4958 4959 return rc; 4960 } 4961 4962 static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev, 4963 struct hl_cs_parser *parser, 4964 struct packet_lin_dma *user_dma_pkt) 4965 { 4966 bool src_in_host = false; 4967 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) & 4968 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 4969 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 4970 4971 dev_dbg(hdev->dev, "DMA packet details:\n"); 4972 dev_dbg(hdev->dev, "source == 0x%llx\n", 4973 le64_to_cpu(user_dma_pkt->src_addr)); 4974 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr); 4975 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize)); 4976 4977 /* 4978 * Special handling for DMA with size 0. Bypass all validations 4979 * because no transactions will be done except for WR_COMP, which 4980 * is not a security issue 4981 */ 4982 if (!le32_to_cpu(user_dma_pkt->tsize)) { 4983 parser->patched_cb_size += sizeof(*user_dma_pkt); 4984 return 0; 4985 } 4986 4987 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 4988 src_in_host = true; 4989 4990 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt, 4991 src_in_host); 4992 } 4993 4994 static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev, 4995 struct hl_cs_parser *parser, 4996 struct packet_load_and_exe *user_pkt) 4997 { 4998 u32 cfg; 4999 5000 cfg = le32_to_cpu(user_pkt->cfg); 5001 5002 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) { 5003 dev_err(hdev->dev, 5004 "User not allowed to use Load and Execute\n"); 5005 return -EPERM; 5006 } 5007 5008 parser->patched_cb_size += sizeof(struct packet_load_and_exe); 5009 5010 return 0; 5011 } 5012 5013 static int gaudi_validate_cb(struct hl_device *hdev, 5014 struct hl_cs_parser *parser, bool is_mmu) 5015 { 5016 u32 cb_parsed_length = 0; 5017 int rc = 0; 5018 5019 parser->patched_cb_size = 0; 5020 5021 /* cb_user_size is more than 0 so loop will always be executed */ 5022 while (cb_parsed_length < parser->user_cb_size) { 5023 enum packet_id pkt_id; 5024 u16 pkt_size; 5025 struct gaudi_packet *user_pkt; 5026 5027 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5028 5029 pkt_id = (enum packet_id) ( 5030 (le64_to_cpu(user_pkt->header) & 5031 PACKET_HEADER_PACKET_ID_MASK) >> 5032 PACKET_HEADER_PACKET_ID_SHIFT); 5033 5034 if (!validate_packet_id(pkt_id)) { 5035 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5036 rc = -EINVAL; 5037 break; 5038 } 5039 5040 pkt_size = gaudi_packet_sizes[pkt_id]; 5041 cb_parsed_length += pkt_size; 5042 if (cb_parsed_length > parser->user_cb_size) { 5043 dev_err(hdev->dev, 5044 "packet 0x%x is out of CB boundary\n", pkt_id); 5045 rc = -EINVAL; 5046 break; 5047 } 5048 5049 switch (pkt_id) { 5050 case PACKET_MSG_PROT: 5051 dev_err(hdev->dev, 5052 "User not allowed to use MSG_PROT\n"); 5053 rc = -EPERM; 5054 break; 5055 5056 case PACKET_CP_DMA: 5057 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5058 rc = -EPERM; 5059 break; 5060 5061 case PACKET_STOP: 5062 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5063 rc = -EPERM; 5064 break; 5065 5066 case PACKET_WREG_BULK: 5067 dev_err(hdev->dev, 5068 "User not allowed to use WREG_BULK\n"); 5069 rc = -EPERM; 5070 break; 5071 5072 case PACKET_LOAD_AND_EXE: 5073 rc = gaudi_validate_load_and_exe_pkt(hdev, parser, 5074 (struct packet_load_and_exe *) user_pkt); 5075 break; 5076 5077 case PACKET_LIN_DMA: 5078 parser->contains_dma_pkt = true; 5079 if (is_mmu) 5080 parser->patched_cb_size += pkt_size; 5081 else 5082 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser, 5083 (struct packet_lin_dma *) user_pkt); 5084 break; 5085 5086 case PACKET_WREG_32: 5087 case PACKET_MSG_LONG: 5088 case PACKET_MSG_SHORT: 5089 case PACKET_REPEAT: 5090 case PACKET_FENCE: 5091 case PACKET_NOP: 5092 case PACKET_ARB_POINT: 5093 parser->patched_cb_size += pkt_size; 5094 break; 5095 5096 default: 5097 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5098 pkt_id); 5099 rc = -EINVAL; 5100 break; 5101 } 5102 5103 if (rc) 5104 break; 5105 } 5106 5107 /* 5108 * The new CB should have space at the end for two MSG_PROT packets: 5109 * 1. Optional NOP padding for cacheline alignment 5110 * 2. A packet that will act as a completion packet 5111 * 3. A packet that will generate MSI interrupt 5112 */ 5113 if (parser->completion) 5114 parser->patched_cb_size += gaudi_get_patched_cb_extra_size( 5115 parser->patched_cb_size); 5116 5117 return rc; 5118 } 5119 5120 static int gaudi_patch_dma_packet(struct hl_device *hdev, 5121 struct hl_cs_parser *parser, 5122 struct packet_lin_dma *user_dma_pkt, 5123 struct packet_lin_dma *new_dma_pkt, 5124 u32 *new_dma_pkt_size) 5125 { 5126 struct hl_userptr *userptr; 5127 struct scatterlist *sg, *sg_next_iter; 5128 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl; 5129 u64 len, len_next; 5130 dma_addr_t dma_addr, dma_addr_next; 5131 u64 device_memory_addr, addr; 5132 enum dma_data_direction dir; 5133 struct sg_table *sgt; 5134 bool src_in_host = false; 5135 bool skip_host_mem_pin = false; 5136 bool user_memset; 5137 5138 ctl = le32_to_cpu(user_dma_pkt->ctl); 5139 5140 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3) 5141 src_in_host = true; 5142 5143 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >> 5144 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT; 5145 5146 if (src_in_host) { 5147 addr = le64_to_cpu(user_dma_pkt->src_addr); 5148 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr); 5149 dir = DMA_TO_DEVICE; 5150 if (user_memset) 5151 skip_host_mem_pin = true; 5152 } else { 5153 addr = le64_to_cpu(user_dma_pkt->dst_addr); 5154 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr); 5155 dir = DMA_FROM_DEVICE; 5156 } 5157 5158 if ((!skip_host_mem_pin) && 5159 (!hl_userptr_is_pinned(hdev, addr, 5160 le32_to_cpu(user_dma_pkt->tsize), 5161 parser->job_userptr_list, &userptr))) { 5162 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n", 5163 addr, user_dma_pkt->tsize); 5164 return -EFAULT; 5165 } 5166 5167 if ((user_memset) && (dir == DMA_TO_DEVICE)) { 5168 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt)); 5169 *new_dma_pkt_size = sizeof(*user_dma_pkt); 5170 return 0; 5171 } 5172 5173 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5174 5175 sgt = userptr->sgt; 5176 dma_desc_cnt = 0; 5177 5178 for_each_sgtable_dma_sg(sgt, sg, count) { 5179 len = sg_dma_len(sg); 5180 dma_addr = sg_dma_address(sg); 5181 5182 if (len == 0) 5183 break; 5184 5185 while ((count + 1) < sgt->nents) { 5186 sg_next_iter = sg_next(sg); 5187 len_next = sg_dma_len(sg_next_iter); 5188 dma_addr_next = sg_dma_address(sg_next_iter); 5189 5190 if (len_next == 0) 5191 break; 5192 5193 if ((dma_addr + len == dma_addr_next) && 5194 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) { 5195 len += len_next; 5196 count++; 5197 sg = sg_next_iter; 5198 } else { 5199 break; 5200 } 5201 } 5202 5203 ctl = le32_to_cpu(user_dma_pkt->ctl); 5204 if (likely(dma_desc_cnt)) 5205 ctl &= ~GAUDI_PKT_CTL_EB_MASK; 5206 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK; 5207 new_dma_pkt->ctl = cpu_to_le32(ctl); 5208 new_dma_pkt->tsize = cpu_to_le32(len); 5209 5210 if (dir == DMA_TO_DEVICE) { 5211 new_dma_pkt->src_addr = cpu_to_le64(dma_addr); 5212 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr); 5213 } else { 5214 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr); 5215 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr); 5216 } 5217 5218 if (!user_memset) 5219 device_memory_addr += len; 5220 dma_desc_cnt++; 5221 new_dma_pkt++; 5222 } 5223 5224 if (!dma_desc_cnt) { 5225 dev_err(hdev->dev, 5226 "Error of 0 SG entries when patching DMA packet\n"); 5227 return -EFAULT; 5228 } 5229 5230 /* Fix the last dma packet - wrcomp must be as user set it */ 5231 new_dma_pkt--; 5232 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask); 5233 5234 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma); 5235 5236 return 0; 5237 } 5238 5239 static int gaudi_patch_cb(struct hl_device *hdev, 5240 struct hl_cs_parser *parser) 5241 { 5242 u32 cb_parsed_length = 0; 5243 u32 cb_patched_cur_length = 0; 5244 int rc = 0; 5245 5246 /* cb_user_size is more than 0 so loop will always be executed */ 5247 while (cb_parsed_length < parser->user_cb_size) { 5248 enum packet_id pkt_id; 5249 u16 pkt_size; 5250 u32 new_pkt_size = 0; 5251 struct gaudi_packet *user_pkt, *kernel_pkt; 5252 5253 user_pkt = parser->user_cb->kernel_address + cb_parsed_length; 5254 kernel_pkt = parser->patched_cb->kernel_address + 5255 cb_patched_cur_length; 5256 5257 pkt_id = (enum packet_id) ( 5258 (le64_to_cpu(user_pkt->header) & 5259 PACKET_HEADER_PACKET_ID_MASK) >> 5260 PACKET_HEADER_PACKET_ID_SHIFT); 5261 5262 if (!validate_packet_id(pkt_id)) { 5263 dev_err(hdev->dev, "Invalid packet id %u\n", pkt_id); 5264 rc = -EINVAL; 5265 break; 5266 } 5267 5268 pkt_size = gaudi_packet_sizes[pkt_id]; 5269 cb_parsed_length += pkt_size; 5270 if (cb_parsed_length > parser->user_cb_size) { 5271 dev_err(hdev->dev, 5272 "packet 0x%x is out of CB boundary\n", pkt_id); 5273 rc = -EINVAL; 5274 break; 5275 } 5276 5277 switch (pkt_id) { 5278 case PACKET_LIN_DMA: 5279 rc = gaudi_patch_dma_packet(hdev, parser, 5280 (struct packet_lin_dma *) user_pkt, 5281 (struct packet_lin_dma *) kernel_pkt, 5282 &new_pkt_size); 5283 cb_patched_cur_length += new_pkt_size; 5284 break; 5285 5286 case PACKET_MSG_PROT: 5287 dev_err(hdev->dev, 5288 "User not allowed to use MSG_PROT\n"); 5289 rc = -EPERM; 5290 break; 5291 5292 case PACKET_CP_DMA: 5293 dev_err(hdev->dev, "User not allowed to use CP_DMA\n"); 5294 rc = -EPERM; 5295 break; 5296 5297 case PACKET_STOP: 5298 dev_err(hdev->dev, "User not allowed to use STOP\n"); 5299 rc = -EPERM; 5300 break; 5301 5302 case PACKET_WREG_32: 5303 case PACKET_WREG_BULK: 5304 case PACKET_MSG_LONG: 5305 case PACKET_MSG_SHORT: 5306 case PACKET_REPEAT: 5307 case PACKET_FENCE: 5308 case PACKET_NOP: 5309 case PACKET_ARB_POINT: 5310 case PACKET_LOAD_AND_EXE: 5311 memcpy(kernel_pkt, user_pkt, pkt_size); 5312 cb_patched_cur_length += pkt_size; 5313 break; 5314 5315 default: 5316 dev_err(hdev->dev, "Invalid packet header 0x%x\n", 5317 pkt_id); 5318 rc = -EINVAL; 5319 break; 5320 } 5321 5322 if (rc) 5323 break; 5324 } 5325 5326 return rc; 5327 } 5328 5329 static int gaudi_parse_cb_mmu(struct hl_device *hdev, 5330 struct hl_cs_parser *parser) 5331 { 5332 u64 handle; 5333 u32 patched_cb_size; 5334 struct hl_cb *user_cb; 5335 int rc; 5336 5337 /* 5338 * The new CB should have space at the end for two MSG_PROT packets: 5339 * 1. Optional NOP padding for cacheline alignment 5340 * 2. A packet that will act as a completion packet 5341 * 3. A packet that will generate MSI interrupt 5342 */ 5343 if (parser->completion) 5344 parser->patched_cb_size = parser->user_cb_size + 5345 gaudi_get_patched_cb_extra_size(parser->user_cb_size); 5346 else 5347 parser->patched_cb_size = parser->user_cb_size; 5348 5349 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5350 parser->patched_cb_size, false, false, 5351 &handle); 5352 5353 if (rc) { 5354 dev_err(hdev->dev, 5355 "Failed to allocate patched CB for DMA CS %d\n", 5356 rc); 5357 return rc; 5358 } 5359 5360 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5361 /* hl_cb_get should never fail */ 5362 if (!parser->patched_cb) { 5363 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5364 rc = -EFAULT; 5365 goto out; 5366 } 5367 5368 /* 5369 * We are protected from overflow because the check 5370 * "parser->user_cb_size <= parser->user_cb->size" was done in get_cb_from_cs_chunk() 5371 * in the common code. That check is done only if is_kernel_allocated_cb is true. 5372 * 5373 * There is no option to reach here without going through that check because: 5374 * 1. validate_queue_index() assigns true to is_kernel_allocated_cb for any submission to 5375 * an external queue. 5376 * 2. For Gaudi, we only parse CBs that were submitted to the external queues. 5377 */ 5378 memcpy(parser->patched_cb->kernel_address, 5379 parser->user_cb->kernel_address, 5380 parser->user_cb_size); 5381 5382 patched_cb_size = parser->patched_cb_size; 5383 5384 /* Validate patched CB instead of user CB */ 5385 user_cb = parser->user_cb; 5386 parser->user_cb = parser->patched_cb; 5387 rc = gaudi_validate_cb(hdev, parser, true); 5388 parser->user_cb = user_cb; 5389 5390 if (rc) { 5391 hl_cb_put(parser->patched_cb); 5392 goto out; 5393 } 5394 5395 if (patched_cb_size != parser->patched_cb_size) { 5396 dev_err(hdev->dev, "user CB size mismatch\n"); 5397 hl_cb_put(parser->patched_cb); 5398 rc = -EINVAL; 5399 goto out; 5400 } 5401 5402 out: 5403 /* 5404 * Always call cb destroy here because we still have 1 reference 5405 * to it by calling cb_get earlier. After the job will be completed, 5406 * cb_put will release it, but here we want to remove it from the 5407 * idr 5408 */ 5409 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5410 5411 return rc; 5412 } 5413 5414 static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, 5415 struct hl_cs_parser *parser) 5416 { 5417 u64 handle; 5418 int rc; 5419 5420 rc = gaudi_validate_cb(hdev, parser, false); 5421 5422 if (rc) 5423 goto free_userptr; 5424 5425 rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, 5426 parser->patched_cb_size, false, false, 5427 &handle); 5428 if (rc) { 5429 dev_err(hdev->dev, 5430 "Failed to allocate patched CB for DMA CS %d\n", rc); 5431 goto free_userptr; 5432 } 5433 5434 parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); 5435 /* hl_cb_get should never fail here */ 5436 if (!parser->patched_cb) { 5437 dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); 5438 rc = -EFAULT; 5439 goto out; 5440 } 5441 5442 rc = gaudi_patch_cb(hdev, parser); 5443 5444 if (rc) 5445 hl_cb_put(parser->patched_cb); 5446 5447 out: 5448 /* 5449 * Always call cb destroy here because we still have 1 reference 5450 * to it by calling cb_get earlier. After the job will be completed, 5451 * cb_put will release it, but here we want to remove it from the 5452 * idr 5453 */ 5454 hl_cb_destroy(&hdev->kernel_mem_mgr, handle); 5455 5456 free_userptr: 5457 if (rc) 5458 hl_userptr_delete_list(hdev, parser->job_userptr_list); 5459 return rc; 5460 } 5461 5462 static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev, 5463 struct hl_cs_parser *parser) 5464 { 5465 struct asic_fixed_properties *asic_prop = &hdev->asic_prop; 5466 struct gaudi_device *gaudi = hdev->asic_specific; 5467 u32 nic_queue_offset, nic_mask_q_id; 5468 5469 if ((parser->hw_queue_id >= GAUDI_QUEUE_ID_NIC_0_0) && 5470 (parser->hw_queue_id <= GAUDI_QUEUE_ID_NIC_9_3)) { 5471 nic_queue_offset = parser->hw_queue_id - GAUDI_QUEUE_ID_NIC_0_0; 5472 nic_mask_q_id = 1 << (HW_CAP_NIC_SHIFT + (nic_queue_offset >> 2)); 5473 5474 if (!(gaudi->hw_cap_initialized & nic_mask_q_id)) { 5475 dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 5476 return -EINVAL; 5477 } 5478 } 5479 5480 /* For internal queue jobs just check if CB address is valid */ 5481 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5482 parser->user_cb_size, 5483 asic_prop->sram_user_base_address, 5484 asic_prop->sram_end_address)) 5485 return 0; 5486 5487 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5488 parser->user_cb_size, 5489 asic_prop->dram_user_base_address, 5490 asic_prop->dram_end_address)) 5491 return 0; 5492 5493 /* PMMU and HPMMU addresses are equal, check only one of them */ 5494 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb, 5495 parser->user_cb_size, 5496 asic_prop->pmmu.start_addr, 5497 asic_prop->pmmu.end_addr)) 5498 return 0; 5499 5500 dev_err(hdev->dev, 5501 "CB address 0x%px + 0x%x for internal QMAN is not valid\n", 5502 parser->user_cb, parser->user_cb_size); 5503 5504 return -EFAULT; 5505 } 5506 5507 static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser) 5508 { 5509 struct gaudi_device *gaudi = hdev->asic_specific; 5510 5511 if (parser->queue_type == QUEUE_TYPE_INT) 5512 return gaudi_parse_cb_no_ext_queue(hdev, parser); 5513 5514 if (gaudi->hw_cap_initialized & HW_CAP_MMU) 5515 return gaudi_parse_cb_mmu(hdev, parser); 5516 else 5517 return gaudi_parse_cb_no_mmu(hdev, parser); 5518 } 5519 5520 static void gaudi_add_end_of_cb_packets(struct hl_device *hdev, void *kernel_address, 5521 u32 len, u32 original_len, u64 cq_addr, u32 cq_val, 5522 u32 msi_vec, bool eb) 5523 { 5524 struct packet_msg_prot *cq_pkt; 5525 struct packet_nop *cq_padding; 5526 u64 msi_addr; 5527 u32 tmp; 5528 5529 cq_padding = kernel_address + original_len; 5530 cq_pkt = kernel_address + len - (sizeof(struct packet_msg_prot) * 2); 5531 5532 while ((void *)cq_padding < (void *)cq_pkt) { 5533 cq_padding->ctl = cpu_to_le32(FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_NOP)); 5534 cq_padding++; 5535 } 5536 5537 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5538 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5539 5540 if (eb) 5541 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5542 5543 cq_pkt->ctl = cpu_to_le32(tmp); 5544 cq_pkt->value = cpu_to_le32(cq_val); 5545 cq_pkt->addr = cpu_to_le64(cq_addr); 5546 5547 cq_pkt++; 5548 5549 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 5550 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5551 cq_pkt->ctl = cpu_to_le32(tmp); 5552 cq_pkt->value = cpu_to_le32(1); 5553 msi_addr = hdev->pdev ? mmPCIE_CORE_MSI_REQ : mmPCIE_MSI_INTR_0 + msi_vec * 4; 5554 cq_pkt->addr = cpu_to_le64(CFG_BASE + msi_addr); 5555 } 5556 5557 static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val) 5558 { 5559 WREG32(mmCPU_IF_EQ_RD_OFFS, val); 5560 } 5561 5562 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, 5563 u32 size, u64 val) 5564 { 5565 struct packet_lin_dma *lin_dma_pkt; 5566 struct hl_cs_job *job; 5567 u32 cb_size, ctl, err_cause; 5568 struct hl_cb *cb; 5569 int rc; 5570 5571 cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); 5572 if (!cb) 5573 return -EFAULT; 5574 5575 lin_dma_pkt = cb->kernel_address; 5576 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt)); 5577 cb_size = sizeof(*lin_dma_pkt); 5578 5579 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA); 5580 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK, 1); 5581 ctl |= FIELD_PREP(GAUDI_PKT_LIN_DMA_CTL_LIN_MASK, 1); 5582 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5583 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5584 5585 lin_dma_pkt->ctl = cpu_to_le32(ctl); 5586 lin_dma_pkt->src_addr = cpu_to_le64(val); 5587 lin_dma_pkt->dst_addr |= cpu_to_le64(addr); 5588 lin_dma_pkt->tsize = cpu_to_le32(size); 5589 5590 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5591 if (!job) { 5592 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5593 rc = -ENOMEM; 5594 goto release_cb; 5595 } 5596 5597 /* Verify DMA is OK */ 5598 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5599 if (err_cause && !hdev->init_done) { 5600 dev_dbg(hdev->dev, 5601 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5602 err_cause); 5603 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5604 } 5605 5606 job->id = 0; 5607 job->user_cb = cb; 5608 atomic_inc(&job->user_cb->cs_cnt); 5609 job->user_cb_size = cb_size; 5610 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5611 job->patched_cb = job->user_cb; 5612 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot); 5613 5614 hl_debugfs_add_job(hdev, job); 5615 5616 rc = gaudi_send_job_on_qman0(hdev, job); 5617 hl_debugfs_remove_job(hdev, job); 5618 kfree(job); 5619 atomic_dec(&cb->cs_cnt); 5620 5621 /* Verify DMA is OK */ 5622 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE); 5623 if (err_cause) { 5624 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5625 rc = -EIO; 5626 if (!hdev->init_done) { 5627 dev_dbg(hdev->dev, 5628 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5629 err_cause); 5630 WREG32(mmDMA0_CORE_ERR_CAUSE, err_cause); 5631 } 5632 } 5633 5634 release_cb: 5635 hl_cb_put(cb); 5636 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5637 5638 return rc; 5639 } 5640 5641 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, 5642 u32 num_regs, u32 val) 5643 { 5644 struct packet_msg_long *pkt; 5645 struct hl_cs_job *job; 5646 u32 cb_size, ctl; 5647 struct hl_cb *cb; 5648 int i, rc; 5649 5650 cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot); 5651 5652 if (cb_size > SZ_2M) { 5653 dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M); 5654 return -ENOMEM; 5655 } 5656 5657 cb = hl_cb_kernel_create(hdev, cb_size, false); 5658 if (!cb) 5659 return -EFAULT; 5660 5661 pkt = cb->kernel_address; 5662 5663 ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */ 5664 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG); 5665 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 5666 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 5667 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 5668 5669 for (i = 0; i < num_regs ; i++, pkt++) { 5670 pkt->ctl = cpu_to_le32(ctl); 5671 pkt->value = cpu_to_le32(val); 5672 pkt->addr = cpu_to_le64(reg_base + (i * 4)); 5673 } 5674 5675 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); 5676 if (!job) { 5677 dev_err(hdev->dev, "Failed to allocate a new job\n"); 5678 rc = -ENOMEM; 5679 goto release_cb; 5680 } 5681 5682 job->id = 0; 5683 job->user_cb = cb; 5684 atomic_inc(&job->user_cb->cs_cnt); 5685 job->user_cb_size = cb_size; 5686 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0; 5687 job->patched_cb = job->user_cb; 5688 job->job_cb_size = cb_size; 5689 5690 hl_debugfs_add_job(hdev, job); 5691 5692 rc = gaudi_send_job_on_qman0(hdev, job); 5693 hl_debugfs_remove_job(hdev, job); 5694 kfree(job); 5695 atomic_dec(&cb->cs_cnt); 5696 5697 release_cb: 5698 hl_cb_put(cb); 5699 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 5700 5701 return rc; 5702 } 5703 5704 static int gaudi_restore_sm_registers(struct hl_device *hdev) 5705 { 5706 u64 base_addr; 5707 u32 num_regs; 5708 int rc; 5709 5710 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5711 num_regs = NUM_OF_SOB_IN_BLOCK; 5712 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5713 if (rc) { 5714 dev_err(hdev->dev, "failed resetting SM registers"); 5715 return -ENOMEM; 5716 } 5717 5718 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0; 5719 num_regs = NUM_OF_SOB_IN_BLOCK; 5720 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5721 if (rc) { 5722 dev_err(hdev->dev, "failed resetting SM registers"); 5723 return -ENOMEM; 5724 } 5725 5726 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5727 num_regs = NUM_OF_SOB_IN_BLOCK; 5728 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5729 if (rc) { 5730 dev_err(hdev->dev, "failed resetting SM registers"); 5731 return -ENOMEM; 5732 } 5733 5734 base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5735 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5736 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5737 if (rc) { 5738 dev_err(hdev->dev, "failed resetting SM registers"); 5739 return -ENOMEM; 5740 } 5741 5742 base_addr = CFG_BASE + mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0; 5743 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5744 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5745 if (rc) { 5746 dev_err(hdev->dev, "failed resetting SM registers"); 5747 return -ENOMEM; 5748 } 5749 5750 base_addr = CFG_BASE + mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0; 5751 num_regs = NUM_OF_MONITORS_IN_BLOCK; 5752 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5753 if (rc) { 5754 dev_err(hdev->dev, "failed resetting SM registers"); 5755 return -ENOMEM; 5756 } 5757 5758 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5759 (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4); 5760 num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT; 5761 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5762 if (rc) { 5763 dev_err(hdev->dev, "failed resetting SM registers"); 5764 return -ENOMEM; 5765 } 5766 5767 base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + 5768 (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4); 5769 num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR; 5770 rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0); 5771 if (rc) { 5772 dev_err(hdev->dev, "failed resetting SM registers"); 5773 return -ENOMEM; 5774 } 5775 5776 return 0; 5777 } 5778 5779 static void gaudi_restore_dma_registers(struct hl_device *hdev) 5780 { 5781 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 - 5782 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0; 5783 int i; 5784 5785 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5786 u64 sob_addr = CFG_BASE + 5787 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + 5788 (i * sob_delta); 5789 u32 dma_offset = i * DMA_CORE_OFFSET; 5790 5791 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset, 5792 lower_32_bits(sob_addr)); 5793 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset, 5794 upper_32_bits(sob_addr)); 5795 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001); 5796 5797 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be 5798 * modified by the user for SRAM reduction 5799 */ 5800 if (i > 1) 5801 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset, 5802 0x00000001); 5803 } 5804 } 5805 5806 static void gaudi_restore_qm_registers(struct hl_device *hdev) 5807 { 5808 u32 qman_offset; 5809 int i; 5810 5811 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) { 5812 qman_offset = i * DMA_QMAN_OFFSET; 5813 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0); 5814 } 5815 5816 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) { 5817 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE); 5818 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0); 5819 } 5820 5821 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 5822 qman_offset = i * TPC_QMAN_OFFSET; 5823 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0); 5824 } 5825 5826 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 5827 qman_offset = (i >> 1) * NIC_MACRO_QMAN_OFFSET + 5828 (i & 0x1) * NIC_ENGINE_QMAN_OFFSET; 5829 WREG32(mmNIC0_QM0_ARB_CFG_0 + qman_offset, 0); 5830 } 5831 } 5832 5833 static int gaudi_restore_user_registers(struct hl_device *hdev) 5834 { 5835 int rc; 5836 5837 rc = gaudi_restore_sm_registers(hdev); 5838 if (rc) 5839 return rc; 5840 5841 gaudi_restore_dma_registers(hdev); 5842 gaudi_restore_qm_registers(hdev); 5843 5844 return 0; 5845 } 5846 5847 static int gaudi_context_switch(struct hl_device *hdev, u32 asid) 5848 { 5849 return 0; 5850 } 5851 5852 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev) 5853 { 5854 u32 size = hdev->asic_prop.mmu_pgt_size + 5855 hdev->asic_prop.mmu_cache_mng_size; 5856 struct gaudi_device *gaudi = hdev->asic_specific; 5857 u64 addr = hdev->asic_prop.mmu_pgt_addr; 5858 5859 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 5860 return 0; 5861 5862 return gaudi_memset_device_memory(hdev, addr, size, 0); 5863 } 5864 5865 static void gaudi_restore_phase_topology(struct hl_device *hdev) 5866 { 5867 5868 } 5869 5870 static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, 5871 u32 size_to_dma, dma_addr_t dma_addr) 5872 { 5873 u32 err_cause, val; 5874 u64 dma_offset; 5875 int rc; 5876 5877 dma_offset = dma_id * DMA_CORE_OFFSET; 5878 5879 WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, lower_32_bits(addr)); 5880 WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, upper_32_bits(addr)); 5881 WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(dma_addr)); 5882 WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, upper_32_bits(dma_addr)); 5883 WREG32(mmDMA0_CORE_DST_TSIZE_0 + dma_offset, size_to_dma); 5884 WREG32(mmDMA0_CORE_COMMIT + dma_offset, 5885 (1 << DMA0_CORE_COMMIT_LIN_SHIFT)); 5886 5887 rc = hl_poll_timeout( 5888 hdev, 5889 mmDMA0_CORE_STS0 + dma_offset, 5890 val, 5891 ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), 5892 0, 5893 1000000); 5894 5895 if (rc) { 5896 dev_err(hdev->dev, 5897 "DMA %d timed-out during reading of 0x%llx\n", 5898 dma_id, addr); 5899 return -EIO; 5900 } 5901 5902 /* Verify DMA is OK */ 5903 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5904 if (err_cause) { 5905 dev_err(hdev->dev, "DMA Failed, cause 0x%x\n", err_cause); 5906 dev_dbg(hdev->dev, 5907 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5908 err_cause); 5909 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5910 5911 return -EIO; 5912 } 5913 5914 return 0; 5915 } 5916 5917 static int gaudi_debugfs_read_dma(struct hl_device *hdev, u64 addr, u32 size, 5918 void *blob_addr) 5919 { 5920 u32 dma_core_sts0, err_cause, cfg1, size_left, pos, size_to_dma; 5921 u32 qm_glbl_sts0, qm_cgm_sts; 5922 u64 dma_offset, qm_offset; 5923 dma_addr_t dma_addr; 5924 void *kernel_addr; 5925 bool is_eng_idle; 5926 int rc = 0, dma_id; 5927 5928 kernel_addr = hl_asic_dma_alloc_coherent(hdev, SZ_2M, &dma_addr, GFP_KERNEL | __GFP_ZERO); 5929 5930 if (!kernel_addr) 5931 return -ENOMEM; 5932 5933 hdev->asic_funcs->hw_queues_lock(hdev); 5934 5935 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1]; 5936 dma_offset = dma_id * DMA_CORE_OFFSET; 5937 qm_offset = dma_id * DMA_QMAN_OFFSET; 5938 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5939 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5940 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5941 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5942 IS_DMA_IDLE(dma_core_sts0); 5943 5944 if (!is_eng_idle) { 5945 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2]; 5946 dma_offset = dma_id * DMA_CORE_OFFSET; 5947 qm_offset = dma_id * DMA_QMAN_OFFSET; 5948 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + dma_offset); 5949 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + qm_offset); 5950 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + qm_offset); 5951 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 5952 IS_DMA_IDLE(dma_core_sts0); 5953 5954 if (!is_eng_idle) { 5955 dev_err_ratelimited(hdev->dev, 5956 "Can't read via DMA because it is BUSY\n"); 5957 rc = -EAGAIN; 5958 goto out; 5959 } 5960 } 5961 5962 cfg1 = RREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset); 5963 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, 5964 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT); 5965 5966 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 5967 * using the compute ctx ASID, if exists. If not, use the kernel ctx 5968 * ASID 5969 */ 5970 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT)); 5971 5972 /* Verify DMA is OK */ 5973 err_cause = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 5974 if (err_cause) { 5975 dev_dbg(hdev->dev, 5976 "Clearing DMA0 engine from errors (cause 0x%x)\n", 5977 err_cause); 5978 WREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset, err_cause); 5979 } 5980 5981 pos = 0; 5982 size_left = size; 5983 size_to_dma = SZ_2M; 5984 5985 while (size_left > 0) { 5986 5987 if (size_left < SZ_2M) 5988 size_to_dma = size_left; 5989 5990 rc = gaudi_dma_core_transfer(hdev, dma_id, addr, size_to_dma, 5991 dma_addr); 5992 if (rc) 5993 break; 5994 5995 memcpy(blob_addr + pos, kernel_addr, size_to_dma); 5996 5997 if (size_left <= SZ_2M) 5998 break; 5999 6000 pos += SZ_2M; 6001 addr += SZ_2M; 6002 size_left -= SZ_2M; 6003 } 6004 6005 /* TODO: remove this by mapping the DMA temporary buffer to the MMU 6006 * using the compute ctx ASID, if exists. If not, use the kernel ctx 6007 * ASID 6008 */ 6009 WREG32_AND(mmDMA0_CORE_PROT + dma_offset, 6010 ~BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6011 6012 WREG32(mmDMA0_QM_GLBL_CFG1 + qm_offset, cfg1); 6013 6014 out: 6015 hdev->asic_funcs->hw_queues_unlock(hdev); 6016 6017 hl_asic_dma_free_coherent(hdev, SZ_2M, kernel_addr, dma_addr); 6018 6019 return rc; 6020 } 6021 6022 static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr) 6023 { 6024 struct gaudi_device *gaudi = hdev->asic_specific; 6025 6026 if (hdev->reset_info.hard_reset_pending) 6027 return U64_MAX; 6028 6029 return readq(hdev->pcie_bar[HBM_BAR_ID] + 6030 (addr - gaudi->hbm_bar_cur_addr)); 6031 } 6032 6033 static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val) 6034 { 6035 struct gaudi_device *gaudi = hdev->asic_specific; 6036 6037 if (hdev->reset_info.hard_reset_pending) 6038 return; 6039 6040 writeq(val, hdev->pcie_bar[HBM_BAR_ID] + 6041 (addr - gaudi->hbm_bar_cur_addr)); 6042 } 6043 6044 void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid) 6045 { 6046 /* mask to zero the MMBP and ASID bits */ 6047 WREG32_AND(reg, ~0x7FF); 6048 WREG32_OR(reg, asid); 6049 } 6050 6051 static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid) 6052 { 6053 struct gaudi_device *gaudi = hdev->asic_specific; 6054 6055 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6056 return; 6057 6058 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) { 6059 dev_crit(hdev->dev, "asid %u is too big\n", asid); 6060 return; 6061 } 6062 6063 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6064 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6065 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6066 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6067 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6068 6069 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6070 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6071 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6072 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6073 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6074 6075 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6076 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6077 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6078 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6079 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6080 6081 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6082 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6083 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6084 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6085 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6086 6087 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6088 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6089 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6090 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6091 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6092 6093 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6094 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6095 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6096 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6097 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6098 6099 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6100 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6101 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6102 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6103 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6104 6105 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6106 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6107 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6108 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6109 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6110 6111 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid); 6112 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid); 6113 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid); 6114 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid); 6115 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid); 6116 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid); 6117 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid); 6118 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid); 6119 6120 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6121 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6122 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6123 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6124 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6125 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid); 6126 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid); 6127 6128 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid); 6129 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid); 6130 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid); 6131 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid); 6132 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid); 6133 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid); 6134 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid); 6135 6136 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6137 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6138 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6139 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6140 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6141 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid); 6142 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid); 6143 6144 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid); 6145 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid); 6146 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid); 6147 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid); 6148 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid); 6149 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid); 6150 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid); 6151 6152 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid); 6153 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid); 6154 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid); 6155 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid); 6156 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid); 6157 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid); 6158 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid); 6159 6160 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid); 6161 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid); 6162 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid); 6163 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid); 6164 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid); 6165 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid); 6166 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid); 6167 6168 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid); 6169 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid); 6170 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid); 6171 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid); 6172 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid); 6173 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid); 6174 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid); 6175 6176 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid); 6177 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid); 6178 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid); 6179 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid); 6180 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid); 6181 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid); 6182 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid); 6183 6184 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid); 6185 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid); 6186 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid); 6187 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid); 6188 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid); 6189 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid); 6190 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid); 6191 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid); 6192 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid); 6193 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid); 6194 6195 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid); 6196 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid); 6197 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid); 6198 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid); 6199 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid); 6200 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid); 6201 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid); 6202 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid); 6203 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid); 6204 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid); 6205 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid); 6206 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid); 6207 6208 if (gaudi->hw_cap_initialized & HW_CAP_NIC0) { 6209 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_0, 6210 asid); 6211 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_1, 6212 asid); 6213 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_2, 6214 asid); 6215 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_3, 6216 asid); 6217 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM0_GLBL_NON_SECURE_PROPS_4, 6218 asid); 6219 } 6220 6221 if (gaudi->hw_cap_initialized & HW_CAP_NIC1) { 6222 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_0, 6223 asid); 6224 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_1, 6225 asid); 6226 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_2, 6227 asid); 6228 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_3, 6229 asid); 6230 gaudi_mmu_prepare_reg(hdev, mmNIC0_QM1_GLBL_NON_SECURE_PROPS_4, 6231 asid); 6232 } 6233 6234 if (gaudi->hw_cap_initialized & HW_CAP_NIC2) { 6235 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_0, 6236 asid); 6237 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_1, 6238 asid); 6239 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_2, 6240 asid); 6241 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_3, 6242 asid); 6243 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM0_GLBL_NON_SECURE_PROPS_4, 6244 asid); 6245 } 6246 6247 if (gaudi->hw_cap_initialized & HW_CAP_NIC3) { 6248 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_0, 6249 asid); 6250 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_1, 6251 asid); 6252 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_2, 6253 asid); 6254 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_3, 6255 asid); 6256 gaudi_mmu_prepare_reg(hdev, mmNIC1_QM1_GLBL_NON_SECURE_PROPS_4, 6257 asid); 6258 } 6259 6260 if (gaudi->hw_cap_initialized & HW_CAP_NIC4) { 6261 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_0, 6262 asid); 6263 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_1, 6264 asid); 6265 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_2, 6266 asid); 6267 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_3, 6268 asid); 6269 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM0_GLBL_NON_SECURE_PROPS_4, 6270 asid); 6271 } 6272 6273 if (gaudi->hw_cap_initialized & HW_CAP_NIC5) { 6274 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_0, 6275 asid); 6276 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_1, 6277 asid); 6278 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_2, 6279 asid); 6280 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_3, 6281 asid); 6282 gaudi_mmu_prepare_reg(hdev, mmNIC2_QM1_GLBL_NON_SECURE_PROPS_4, 6283 asid); 6284 } 6285 6286 if (gaudi->hw_cap_initialized & HW_CAP_NIC6) { 6287 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_0, 6288 asid); 6289 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_1, 6290 asid); 6291 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_2, 6292 asid); 6293 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_3, 6294 asid); 6295 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM0_GLBL_NON_SECURE_PROPS_4, 6296 asid); 6297 } 6298 6299 if (gaudi->hw_cap_initialized & HW_CAP_NIC7) { 6300 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_0, 6301 asid); 6302 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_1, 6303 asid); 6304 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_2, 6305 asid); 6306 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_3, 6307 asid); 6308 gaudi_mmu_prepare_reg(hdev, mmNIC3_QM1_GLBL_NON_SECURE_PROPS_4, 6309 asid); 6310 } 6311 6312 if (gaudi->hw_cap_initialized & HW_CAP_NIC8) { 6313 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_0, 6314 asid); 6315 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_1, 6316 asid); 6317 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_2, 6318 asid); 6319 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_3, 6320 asid); 6321 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM0_GLBL_NON_SECURE_PROPS_4, 6322 asid); 6323 } 6324 6325 if (gaudi->hw_cap_initialized & HW_CAP_NIC9) { 6326 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_0, 6327 asid); 6328 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_1, 6329 asid); 6330 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_2, 6331 asid); 6332 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_3, 6333 asid); 6334 gaudi_mmu_prepare_reg(hdev, mmNIC4_QM1_GLBL_NON_SECURE_PROPS_4, 6335 asid); 6336 } 6337 6338 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid); 6339 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid); 6340 } 6341 6342 static int gaudi_send_job_on_qman0(struct hl_device *hdev, 6343 struct hl_cs_job *job) 6344 { 6345 struct packet_msg_prot *fence_pkt; 6346 u32 *fence_ptr; 6347 dma_addr_t fence_dma_addr; 6348 struct hl_cb *cb; 6349 u32 tmp, timeout, dma_offset; 6350 int rc; 6351 6352 if (hdev->pldm) 6353 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC; 6354 else 6355 timeout = HL_DEVICE_TIMEOUT_USEC; 6356 6357 fence_ptr = hl_asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL, &fence_dma_addr); 6358 if (!fence_ptr) { 6359 dev_err(hdev->dev, 6360 "Failed to allocate fence memory for QMAN0\n"); 6361 return -ENOMEM; 6362 } 6363 6364 cb = job->patched_cb; 6365 6366 fence_pkt = cb->kernel_address + 6367 job->job_cb_size - sizeof(struct packet_msg_prot); 6368 6369 tmp = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_PROT); 6370 tmp |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1); 6371 tmp |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 6372 6373 fence_pkt->ctl = cpu_to_le32(tmp); 6374 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL); 6375 fence_pkt->addr = cpu_to_le64(fence_dma_addr); 6376 6377 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET; 6378 6379 WREG32(mmDMA0_CORE_PROT + dma_offset, 6380 BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT)); 6381 6382 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0, 6383 job->job_cb_size, cb->bus_address); 6384 if (rc) { 6385 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc); 6386 goto free_fence_ptr; 6387 } 6388 6389 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, 6390 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000, 6391 timeout, true); 6392 6393 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0); 6394 6395 if (rc == -ETIMEDOUT) { 6396 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp); 6397 goto free_fence_ptr; 6398 } 6399 6400 free_fence_ptr: 6401 WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT)); 6402 6403 hl_asic_dma_pool_free(hdev, (void *) fence_ptr, fence_dma_addr); 6404 return rc; 6405 } 6406 6407 static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size) 6408 { 6409 if (event_type >= GAUDI_EVENT_SIZE) 6410 goto event_not_supported; 6411 6412 if (!gaudi_irq_map_table[event_type].valid) 6413 goto event_not_supported; 6414 6415 snprintf(desc, size, gaudi_irq_map_table[event_type].name); 6416 6417 return; 6418 6419 event_not_supported: 6420 snprintf(desc, size, "N/A"); 6421 } 6422 6423 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y, 6424 bool is_write, u16 *engine_id_1, 6425 u16 *engine_id_2) 6426 { 6427 u32 dma_id[2], dma_offset, err_cause[2], mask, i; 6428 6429 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK : 6430 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK; 6431 6432 switch (x_y) { 6433 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6434 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6435 dma_id[0] = 0; 6436 dma_id[1] = 2; 6437 break; 6438 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6439 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6440 dma_id[0] = 1; 6441 dma_id[1] = 3; 6442 break; 6443 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6444 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6445 dma_id[0] = 4; 6446 dma_id[1] = 6; 6447 break; 6448 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6449 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6450 dma_id[0] = 5; 6451 dma_id[1] = 7; 6452 break; 6453 default: 6454 goto unknown_initiator; 6455 } 6456 6457 for (i = 0 ; i < 2 ; i++) { 6458 dma_offset = dma_id[i] * DMA_CORE_OFFSET; 6459 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset); 6460 } 6461 6462 switch (x_y) { 6463 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6464 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6465 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6466 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6467 return "DMA0"; 6468 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6469 *engine_id_1 = GAUDI_ENGINE_ID_DMA_2; 6470 return "DMA2"; 6471 } else { 6472 *engine_id_1 = GAUDI_ENGINE_ID_DMA_0; 6473 *engine_id_2 = GAUDI_ENGINE_ID_DMA_2; 6474 return "DMA0 or DMA2"; 6475 } 6476 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6477 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6478 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6479 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6480 return "DMA1"; 6481 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6482 *engine_id_1 = GAUDI_ENGINE_ID_DMA_3; 6483 return "DMA3"; 6484 } else { 6485 *engine_id_1 = GAUDI_ENGINE_ID_DMA_1; 6486 *engine_id_2 = GAUDI_ENGINE_ID_DMA_3; 6487 return "DMA1 or DMA3"; 6488 } 6489 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6490 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6491 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6492 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6493 return "DMA4"; 6494 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6495 *engine_id_1 = GAUDI_ENGINE_ID_DMA_6; 6496 return "DMA6"; 6497 } else { 6498 *engine_id_1 = GAUDI_ENGINE_ID_DMA_4; 6499 *engine_id_2 = GAUDI_ENGINE_ID_DMA_6; 6500 return "DMA4 or DMA6"; 6501 } 6502 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6503 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6504 if ((err_cause[0] & mask) && !(err_cause[1] & mask)) { 6505 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6506 return "DMA5"; 6507 } else if (!(err_cause[0] & mask) && (err_cause[1] & mask)) { 6508 *engine_id_1 = GAUDI_ENGINE_ID_DMA_7; 6509 return "DMA7"; 6510 } else { 6511 *engine_id_1 = GAUDI_ENGINE_ID_DMA_5; 6512 *engine_id_2 = GAUDI_ENGINE_ID_DMA_7; 6513 return "DMA5 or DMA7"; 6514 } 6515 } 6516 6517 unknown_initiator: 6518 return "unknown initiator"; 6519 } 6520 6521 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write, 6522 u16 *engine_id_1, u16 *engine_id_2) 6523 { 6524 u32 val, x_y, axi_id; 6525 6526 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) : 6527 RREG32(mmMMU_UP_RAZWI_READ_ID); 6528 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) | 6529 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT)); 6530 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK << 6531 RAZWI_INITIATOR_AXI_ID_SHIFT); 6532 6533 switch (x_y) { 6534 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0: 6535 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6536 *engine_id_1 = GAUDI_ENGINE_ID_TPC_0; 6537 return "TPC0"; 6538 } 6539 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6540 *engine_id_1 = GAUDI_ENGINE_ID_NIC_0; 6541 return "NIC0"; 6542 } 6543 break; 6544 case RAZWI_INITIATOR_ID_X_Y_TPC1: 6545 *engine_id_1 = GAUDI_ENGINE_ID_TPC_1; 6546 return "TPC1"; 6547 case RAZWI_INITIATOR_ID_X_Y_MME0_0: 6548 case RAZWI_INITIATOR_ID_X_Y_MME0_1: 6549 *engine_id_1 = GAUDI_ENGINE_ID_MME_0; 6550 return "MME0"; 6551 case RAZWI_INITIATOR_ID_X_Y_MME1_0: 6552 case RAZWI_INITIATOR_ID_X_Y_MME1_1: 6553 *engine_id_1 = GAUDI_ENGINE_ID_MME_1; 6554 return "MME1"; 6555 case RAZWI_INITIATOR_ID_X_Y_TPC2: 6556 *engine_id_1 = GAUDI_ENGINE_ID_TPC_2; 6557 return "TPC2"; 6558 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC: 6559 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6560 *engine_id_1 = GAUDI_ENGINE_ID_TPC_3; 6561 return "TPC3"; 6562 } 6563 /* PCI, CPU or PSOC does not have engine id*/ 6564 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI)) 6565 return "PCI"; 6566 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU)) 6567 return "CPU"; 6568 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC)) 6569 return "PSOC"; 6570 break; 6571 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0: 6572 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1: 6573 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0: 6574 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1: 6575 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0: 6576 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1: 6577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0: 6578 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1: 6579 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write, 6580 engine_id_1, engine_id_2); 6581 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2: 6582 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6583 *engine_id_1 = GAUDI_ENGINE_ID_TPC_4; 6584 return "TPC4"; 6585 } 6586 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6587 *engine_id_1 = GAUDI_ENGINE_ID_NIC_1; 6588 return "NIC1"; 6589 } 6590 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6591 *engine_id_1 = GAUDI_ENGINE_ID_NIC_2; 6592 return "NIC2"; 6593 } 6594 break; 6595 case RAZWI_INITIATOR_ID_X_Y_TPC5: 6596 *engine_id_1 = GAUDI_ENGINE_ID_TPC_5; 6597 return "TPC5"; 6598 case RAZWI_INITIATOR_ID_X_Y_MME2_0: 6599 case RAZWI_INITIATOR_ID_X_Y_MME2_1: 6600 *engine_id_1 = GAUDI_ENGINE_ID_MME_2; 6601 return "MME2"; 6602 case RAZWI_INITIATOR_ID_X_Y_MME3_0: 6603 case RAZWI_INITIATOR_ID_X_Y_MME3_1: 6604 *engine_id_1 = GAUDI_ENGINE_ID_MME_3; 6605 return "MME3"; 6606 case RAZWI_INITIATOR_ID_X_Y_TPC6: 6607 *engine_id_1 = GAUDI_ENGINE_ID_TPC_6; 6608 return "TPC6"; 6609 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5: 6610 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC)) { 6611 *engine_id_1 = GAUDI_ENGINE_ID_TPC_7; 6612 return "TPC7"; 6613 } 6614 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC)) { 6615 *engine_id_1 = GAUDI_ENGINE_ID_NIC_4; 6616 return "NIC4"; 6617 } 6618 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT)) { 6619 *engine_id_1 = GAUDI_ENGINE_ID_NIC_5; 6620 return "NIC5"; 6621 } 6622 break; 6623 default: 6624 break; 6625 } 6626 6627 dev_err(hdev->dev, 6628 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n", 6629 val, 6630 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK, 6631 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK, 6632 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) & 6633 RAZWI_INITIATOR_AXI_ID_MASK); 6634 6635 return "unknown initiator"; 6636 } 6637 6638 static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1, 6639 u16 *engine_id_2, bool *is_read, bool *is_write) 6640 { 6641 6642 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) { 6643 dev_err_ratelimited(hdev->dev, 6644 "RAZWI event caused by illegal write of %s\n", 6645 gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2)); 6646 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0); 6647 *is_write = true; 6648 } 6649 6650 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) { 6651 dev_err_ratelimited(hdev->dev, 6652 "RAZWI event caused by illegal read of %s\n", 6653 gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2)); 6654 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0); 6655 *is_read = true; 6656 } 6657 } 6658 6659 static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask) 6660 { 6661 struct gaudi_device *gaudi = hdev->asic_specific; 6662 u32 val; 6663 6664 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 6665 return; 6666 6667 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE); 6668 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6669 *addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK; 6670 *addr <<= 32; 6671 *addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA); 6672 6673 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr); 6674 hl_handle_page_fault(hdev, *addr, 0, true, event_mask); 6675 6676 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0); 6677 } 6678 6679 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE); 6680 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) { 6681 *addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK; 6682 *addr <<= 32; 6683 *addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA); 6684 6685 dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr); 6686 6687 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0); 6688 } 6689 } 6690 6691 /* 6692 * +-------------------+------------------------------------------------------+ 6693 * | Configuration Reg | Description | 6694 * | Address | | 6695 * +-------------------+------------------------------------------------------+ 6696 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)| 6697 * | |0xF30 memory wrappers 31:0 (MSB to LSB) | 6698 * | |0xF34 memory wrappers 63:32 | 6699 * | |0xF38 memory wrappers 95:64 | 6700 * | |0xF3C memory wrappers 127:96 | 6701 * +-------------------+------------------------------------------------------+ 6702 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)| 6703 * | |0xF40 memory wrappers 31:0 (MSB to LSB) | 6704 * | |0xF44 memory wrappers 63:32 | 6705 * | |0xF48 memory wrappers 95:64 | 6706 * | |0xF4C memory wrappers 127:96 | 6707 * +-------------------+------------------------------------------------------+ 6708 */ 6709 static int gaudi_extract_ecc_info(struct hl_device *hdev, 6710 struct ecc_info_extract_params *params, u64 *ecc_address, 6711 u64 *ecc_syndrom, u8 *memory_wrapper_idx) 6712 { 6713 u32 i, num_mem_regs, reg, err_bit; 6714 u64 err_addr, err_word = 0; 6715 6716 num_mem_regs = params->num_memories / 32 + 6717 ((params->num_memories % 32) ? 1 : 0); 6718 6719 if (params->block_address >= CFG_BASE) 6720 params->block_address -= CFG_BASE; 6721 6722 if (params->derr) 6723 err_addr = params->block_address + GAUDI_ECC_DERR0_OFFSET; 6724 else 6725 err_addr = params->block_address + GAUDI_ECC_SERR0_OFFSET; 6726 6727 /* Set invalid wrapper index */ 6728 *memory_wrapper_idx = 0xFF; 6729 6730 /* Iterate through memory wrappers, a single bit must be set */ 6731 for (i = 0 ; i < num_mem_regs ; i++) { 6732 err_addr += i * 4; 6733 err_word = RREG32(err_addr); 6734 if (err_word) { 6735 err_bit = __ffs(err_word); 6736 *memory_wrapper_idx = err_bit + (32 * i); 6737 break; 6738 } 6739 } 6740 6741 if (*memory_wrapper_idx == 0xFF) { 6742 dev_err(hdev->dev, "ECC error information cannot be found\n"); 6743 return -EINVAL; 6744 } 6745 6746 WREG32(params->block_address + GAUDI_ECC_MEM_SEL_OFFSET, 6747 *memory_wrapper_idx); 6748 6749 *ecc_address = 6750 RREG32(params->block_address + GAUDI_ECC_ADDRESS_OFFSET); 6751 *ecc_syndrom = 6752 RREG32(params->block_address + GAUDI_ECC_SYNDROME_OFFSET); 6753 6754 /* Clear error indication */ 6755 reg = RREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET); 6756 if (params->derr) 6757 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_DERR_MASK, 1); 6758 else 6759 reg |= FIELD_PREP(GAUDI_ECC_MEM_INFO_CLR_SERR_MASK, 1); 6760 6761 WREG32(params->block_address + GAUDI_ECC_MEM_INFO_CLR_OFFSET, reg); 6762 6763 return 0; 6764 } 6765 6766 /* 6767 * gaudi_queue_idx_dec - decrement queue index (pi/ci) and handle wrap 6768 * 6769 * @idx: the current pi/ci value 6770 * @q_len: the queue length (power of 2) 6771 * 6772 * @return the cyclically decremented index 6773 */ 6774 static inline u32 gaudi_queue_idx_dec(u32 idx, u32 q_len) 6775 { 6776 u32 mask = q_len - 1; 6777 6778 /* 6779 * modular decrement is equivalent to adding (queue_size -1) 6780 * later we take LSBs to make sure the value is in the 6781 * range [0, queue_len - 1] 6782 */ 6783 return (idx + q_len - 1) & mask; 6784 } 6785 6786 /** 6787 * gaudi_handle_sw_config_stream_data - print SW config stream data 6788 * 6789 * @hdev: pointer to the habanalabs device structure 6790 * @stream: the QMAN's stream 6791 * @qman_base: base address of QMAN registers block 6792 * @event_mask: mask of the last events occurred 6793 */ 6794 static void gaudi_handle_sw_config_stream_data(struct hl_device *hdev, u32 stream, 6795 u64 qman_base, u64 event_mask) 6796 { 6797 u64 cq_ptr_lo, cq_ptr_hi, cq_tsize, cq_ptr; 6798 u32 cq_ptr_lo_off, size; 6799 6800 cq_ptr_lo_off = mmTPC0_QM_CQ_PTR_LO_1 - mmTPC0_QM_CQ_PTR_LO_0; 6801 6802 cq_ptr_lo = qman_base + (mmTPC0_QM_CQ_PTR_LO_0 - mmTPC0_QM_BASE) + 6803 stream * cq_ptr_lo_off; 6804 cq_ptr_hi = cq_ptr_lo + 6805 (mmTPC0_QM_CQ_PTR_HI_0 - mmTPC0_QM_CQ_PTR_LO_0); 6806 cq_tsize = cq_ptr_lo + 6807 (mmTPC0_QM_CQ_TSIZE_0 - mmTPC0_QM_CQ_PTR_LO_0); 6808 6809 cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo); 6810 size = RREG32(cq_tsize); 6811 dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n", 6812 stream, cq_ptr, size); 6813 6814 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6815 hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6816 hdev->captured_err_info.undef_opcode.cq_size = size; 6817 hdev->captured_err_info.undef_opcode.stream_id = stream; 6818 } 6819 } 6820 6821 /** 6822 * gaudi_handle_last_pqes_on_err - print last PQEs on error 6823 * 6824 * @hdev: pointer to the habanalabs device structure 6825 * @qid_base: first QID of the QMAN (out of 4 streams) 6826 * @stream: the QMAN's stream 6827 * @qman_base: base address of QMAN registers block 6828 * @event_mask: mask of the last events occurred 6829 * @pr_sw_conf: if true print the SW config stream data (CQ PTR and SIZE) 6830 */ 6831 static void gaudi_handle_last_pqes_on_err(struct hl_device *hdev, u32 qid_base, 6832 u32 stream, u64 qman_base, 6833 u64 event_mask, 6834 bool pr_sw_conf) 6835 { 6836 u32 ci, qm_ci_stream_off, queue_len; 6837 struct hl_hw_queue *q; 6838 u64 pq_ci, addr[PQ_FETCHER_CACHE_SIZE]; 6839 int i; 6840 6841 q = &hdev->kernel_queues[qid_base + stream]; 6842 6843 qm_ci_stream_off = mmTPC0_QM_PQ_CI_1 - mmTPC0_QM_PQ_CI_0; 6844 pq_ci = qman_base + (mmTPC0_QM_PQ_CI_0 - mmTPC0_QM_BASE) + 6845 stream * qm_ci_stream_off; 6846 6847 queue_len = (q->queue_type == QUEUE_TYPE_INT) ? 6848 q->int_queue_len : HL_QUEUE_LENGTH; 6849 6850 hdev->asic_funcs->hw_queues_lock(hdev); 6851 6852 if (pr_sw_conf) 6853 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6854 6855 ci = RREG32(pq_ci); 6856 6857 /* we should start printing form ci -1 */ 6858 ci = gaudi_queue_idx_dec(ci, queue_len); 6859 memset(addr, 0, sizeof(addr)); 6860 6861 for (i = 0; i < PQ_FETCHER_CACHE_SIZE; i++) { 6862 struct hl_bd *bd; 6863 u32 len; 6864 6865 bd = q->kernel_address; 6866 bd += ci; 6867 6868 len = le32_to_cpu(bd->len); 6869 /* len 0 means uninitialized entry- break */ 6870 if (!len) 6871 break; 6872 6873 addr[i] = le64_to_cpu(bd->ptr); 6874 6875 dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n", 6876 stream, ci, addr[i], len); 6877 6878 /* get previous ci, wrap if needed */ 6879 ci = gaudi_queue_idx_dec(ci, queue_len); 6880 } 6881 6882 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6883 struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6884 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6885 6886 if (arr_idx == 0) { 6887 undef_opcode->timestamp = ktime_get(); 6888 undef_opcode->engine_id = gaudi_queue_id_to_engine_id[qid_base]; 6889 } 6890 6891 memcpy(undef_opcode->cb_addr_streams[arr_idx], addr, sizeof(addr)); 6892 undef_opcode->cb_addr_streams_len++; 6893 } 6894 6895 hdev->asic_funcs->hw_queues_unlock(hdev); 6896 } 6897 6898 /** 6899 * handle_qman_data_on_err - extract QMAN data on error 6900 * 6901 * @hdev: pointer to the habanalabs device structure 6902 * @qid_base: first QID of the QMAN (out of 4 streams) 6903 * @stream: the QMAN's stream 6904 * @qman_base: base address of QMAN registers block 6905 * @event_mask: mask of the last events occurred 6906 * 6907 * This function attempt to exatract as much data as possible on QMAN error. 6908 * On upper CP print the SW config stream data and last 8 PQEs. 6909 * On lower CP print SW config data and last PQEs of ALL 4 upper CPs 6910 */ 6911 static void handle_qman_data_on_err(struct hl_device *hdev, u32 qid_base, 6912 u32 stream, u64 qman_base, u64 event_mask) 6913 { 6914 u32 i; 6915 6916 if (stream != QMAN_STREAMS) { 6917 gaudi_handle_last_pqes_on_err(hdev, qid_base, stream, 6918 qman_base, event_mask, true); 6919 return; 6920 } 6921 6922 /* handle Lower-CP */ 6923 gaudi_handle_sw_config_stream_data(hdev, stream, qman_base, event_mask); 6924 6925 for (i = 0; i < QMAN_STREAMS; i++) 6926 gaudi_handle_last_pqes_on_err(hdev, qid_base, i, 6927 qman_base, event_mask, false); 6928 } 6929 6930 static void gaudi_handle_qman_err_generic(struct hl_device *hdev, 6931 const char *qm_name, 6932 u64 qman_base, 6933 u32 qid_base, 6934 u64 *event_mask) 6935 { 6936 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val; 6937 u64 glbl_sts_addr, arb_err_addr; 6938 char reg_desc[32]; 6939 6940 glbl_sts_addr = qman_base + (mmTPC0_QM_GLBL_STS1_0 - mmTPC0_QM_BASE); 6941 arb_err_addr = qman_base + (mmTPC0_QM_ARB_ERR_CAUSE - mmTPC0_QM_BASE); 6942 6943 /* Iterate through all stream GLBL_STS1 registers + Lower CP */ 6944 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) { 6945 glbl_sts_clr_val = 0; 6946 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i); 6947 6948 if (!glbl_sts_val) 6949 continue; 6950 6951 if (i == QMAN_STREAMS) 6952 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP"); 6953 else 6954 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i); 6955 6956 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) { 6957 if (glbl_sts_val & BIT(j)) { 6958 dev_err_ratelimited(hdev->dev, 6959 "%s %s. err cause: %s\n", 6960 qm_name, reg_desc, 6961 gaudi_qman_error_cause[j]); 6962 glbl_sts_clr_val |= BIT(j); 6963 } 6964 } 6965 /* check for undefined opcode */ 6966 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 6967 hdev->captured_err_info.undef_opcode.write_enable) { 6968 memset(&hdev->captured_err_info.undef_opcode, 0, 6969 sizeof(hdev->captured_err_info.undef_opcode)); 6970 6971 hdev->captured_err_info.undef_opcode.write_enable = false; 6972 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 6973 } 6974 6975 /* Write 1 clear errors */ 6976 if (!hdev->stop_on_err) 6977 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val); 6978 else 6979 handle_qman_data_on_err(hdev, qid_base, i, qman_base, *event_mask); 6980 } 6981 6982 arb_err_val = RREG32(arb_err_addr); 6983 6984 if (!arb_err_val) 6985 return; 6986 6987 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) { 6988 if (arb_err_val & BIT(j)) { 6989 dev_err_ratelimited(hdev->dev, 6990 "%s ARB_ERR. err cause: %s\n", 6991 qm_name, 6992 gaudi_qman_arb_error_cause[j]); 6993 } 6994 } 6995 } 6996 6997 static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type, 6998 struct hl_eq_sm_sei_data *sei_data) 6999 { 7000 u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0; 7001 7002 /* Flip the bits as the enum is ordered in the opposite way */ 7003 index = (index ^ 0x3) & 0x3; 7004 7005 switch (sei_data->sei_cause) { 7006 case SM_SEI_SO_OVERFLOW: 7007 dev_err_ratelimited(hdev->dev, 7008 "%s SEI Error: SOB Group %u overflow/underflow", 7009 gaudi_sync_manager_names[index], 7010 le32_to_cpu(sei_data->sei_log)); 7011 break; 7012 case SM_SEI_LBW_4B_UNALIGNED: 7013 dev_err_ratelimited(hdev->dev, 7014 "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x", 7015 gaudi_sync_manager_names[index], 7016 le32_to_cpu(sei_data->sei_log)); 7017 break; 7018 case SM_SEI_AXI_RESPONSE_ERR: 7019 dev_err_ratelimited(hdev->dev, 7020 "%s SEI Error: AXI ID %u response error", 7021 gaudi_sync_manager_names[index], 7022 le32_to_cpu(sei_data->sei_log)); 7023 break; 7024 default: 7025 dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u", 7026 le32_to_cpu(sei_data->sei_log)); 7027 break; 7028 } 7029 } 7030 7031 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type, 7032 struct hl_eq_ecc_data *ecc_data) 7033 { 7034 struct ecc_info_extract_params params; 7035 u64 ecc_address = 0, ecc_syndrom = 0; 7036 u8 index, memory_wrapper_idx = 0; 7037 bool extract_info_from_fw; 7038 int rc; 7039 7040 if (hdev->asic_prop.fw_security_enabled) { 7041 extract_info_from_fw = true; 7042 goto extract_ecc_info; 7043 } 7044 7045 switch (event_type) { 7046 case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR: 7047 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR: 7048 extract_info_from_fw = true; 7049 break; 7050 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7051 index = event_type - GAUDI_EVENT_TPC0_SERR; 7052 params.block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7053 params.num_memories = 90; 7054 params.derr = false; 7055 extract_info_from_fw = false; 7056 break; 7057 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7058 index = event_type - GAUDI_EVENT_TPC0_DERR; 7059 params.block_address = 7060 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET; 7061 params.num_memories = 90; 7062 params.derr = true; 7063 extract_info_from_fw = false; 7064 break; 7065 case GAUDI_EVENT_MME0_ACC_SERR: 7066 case GAUDI_EVENT_MME1_ACC_SERR: 7067 case GAUDI_EVENT_MME2_ACC_SERR: 7068 case GAUDI_EVENT_MME3_ACC_SERR: 7069 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4; 7070 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7071 params.num_memories = 128; 7072 params.derr = false; 7073 extract_info_from_fw = false; 7074 break; 7075 case GAUDI_EVENT_MME0_ACC_DERR: 7076 case GAUDI_EVENT_MME1_ACC_DERR: 7077 case GAUDI_EVENT_MME2_ACC_DERR: 7078 case GAUDI_EVENT_MME3_ACC_DERR: 7079 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4; 7080 params.block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET; 7081 params.num_memories = 128; 7082 params.derr = true; 7083 extract_info_from_fw = false; 7084 break; 7085 case GAUDI_EVENT_MME0_SBAB_SERR: 7086 case GAUDI_EVENT_MME1_SBAB_SERR: 7087 case GAUDI_EVENT_MME2_SBAB_SERR: 7088 case GAUDI_EVENT_MME3_SBAB_SERR: 7089 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4; 7090 params.block_address = 7091 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7092 params.num_memories = 33; 7093 params.derr = false; 7094 extract_info_from_fw = false; 7095 break; 7096 case GAUDI_EVENT_MME0_SBAB_DERR: 7097 case GAUDI_EVENT_MME1_SBAB_DERR: 7098 case GAUDI_EVENT_MME2_SBAB_DERR: 7099 case GAUDI_EVENT_MME3_SBAB_DERR: 7100 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4; 7101 params.block_address = 7102 mmMME0_SBAB_BASE + index * MME_ACC_OFFSET; 7103 params.num_memories = 33; 7104 params.derr = true; 7105 extract_info_from_fw = false; 7106 break; 7107 default: 7108 return; 7109 } 7110 7111 extract_ecc_info: 7112 if (extract_info_from_fw) { 7113 ecc_address = le64_to_cpu(ecc_data->ecc_address); 7114 ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom); 7115 memory_wrapper_idx = ecc_data->memory_wrapper_idx; 7116 } else { 7117 rc = gaudi_extract_ecc_info(hdev, ¶ms, &ecc_address, 7118 &ecc_syndrom, &memory_wrapper_idx); 7119 if (rc) 7120 return; 7121 } 7122 7123 dev_err(hdev->dev, 7124 "ECC error detected. address: %#llx. Syndrom: %#llx. block id %u\n", 7125 ecc_address, ecc_syndrom, memory_wrapper_idx); 7126 } 7127 7128 static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7129 { 7130 u64 qman_base; 7131 char desc[32]; 7132 u32 qid_base; 7133 u8 index; 7134 7135 switch (event_type) { 7136 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7137 index = event_type - GAUDI_EVENT_TPC0_QM; 7138 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7139 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; 7140 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index); 7141 break; 7142 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7143 if (event_type == GAUDI_EVENT_MME0_QM) { 7144 index = 0; 7145 qid_base = GAUDI_QUEUE_ID_MME_0_0; 7146 } else { /* event_type == GAUDI_EVENT_MME2_QM */ 7147 index = 2; 7148 qid_base = GAUDI_QUEUE_ID_MME_1_0; 7149 } 7150 qman_base = mmMME0_QM_BASE + index * MME_QMAN_OFFSET; 7151 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index); 7152 break; 7153 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7154 index = event_type - GAUDI_EVENT_DMA0_QM; 7155 qid_base = GAUDI_QUEUE_ID_DMA_0_0 + index * QMAN_STREAMS; 7156 /* skip GAUDI_QUEUE_ID_CPU_PQ if necessary */ 7157 if (index > 1) 7158 qid_base++; 7159 qman_base = mmDMA0_QM_BASE + index * DMA_QMAN_OFFSET; 7160 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index); 7161 break; 7162 case GAUDI_EVENT_NIC0_QM0: 7163 qid_base = GAUDI_QUEUE_ID_NIC_0_0; 7164 qman_base = mmNIC0_QM0_BASE; 7165 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM0"); 7166 break; 7167 case GAUDI_EVENT_NIC0_QM1: 7168 qid_base = GAUDI_QUEUE_ID_NIC_1_0; 7169 qman_base = mmNIC0_QM1_BASE; 7170 snprintf(desc, ARRAY_SIZE(desc), "NIC0_QM1"); 7171 break; 7172 case GAUDI_EVENT_NIC1_QM0: 7173 qid_base = GAUDI_QUEUE_ID_NIC_2_0; 7174 qman_base = mmNIC1_QM0_BASE; 7175 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM0"); 7176 break; 7177 case GAUDI_EVENT_NIC1_QM1: 7178 qid_base = GAUDI_QUEUE_ID_NIC_3_0; 7179 qman_base = mmNIC1_QM1_BASE; 7180 snprintf(desc, ARRAY_SIZE(desc), "NIC1_QM1"); 7181 break; 7182 case GAUDI_EVENT_NIC2_QM0: 7183 qid_base = GAUDI_QUEUE_ID_NIC_4_0; 7184 qman_base = mmNIC2_QM0_BASE; 7185 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM0"); 7186 break; 7187 case GAUDI_EVENT_NIC2_QM1: 7188 qid_base = GAUDI_QUEUE_ID_NIC_5_0; 7189 qman_base = mmNIC2_QM1_BASE; 7190 snprintf(desc, ARRAY_SIZE(desc), "NIC2_QM1"); 7191 break; 7192 case GAUDI_EVENT_NIC3_QM0: 7193 qid_base = GAUDI_QUEUE_ID_NIC_6_0; 7194 qman_base = mmNIC3_QM0_BASE; 7195 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM0"); 7196 break; 7197 case GAUDI_EVENT_NIC3_QM1: 7198 qid_base = GAUDI_QUEUE_ID_NIC_7_0; 7199 qman_base = mmNIC3_QM1_BASE; 7200 snprintf(desc, ARRAY_SIZE(desc), "NIC3_QM1"); 7201 break; 7202 case GAUDI_EVENT_NIC4_QM0: 7203 qid_base = GAUDI_QUEUE_ID_NIC_8_0; 7204 qman_base = mmNIC4_QM0_BASE; 7205 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM0"); 7206 break; 7207 case GAUDI_EVENT_NIC4_QM1: 7208 qid_base = GAUDI_QUEUE_ID_NIC_9_0; 7209 qman_base = mmNIC4_QM1_BASE; 7210 snprintf(desc, ARRAY_SIZE(desc), "NIC4_QM1"); 7211 break; 7212 default: 7213 return; 7214 } 7215 7216 gaudi_handle_qman_err_generic(hdev, desc, qman_base, qid_base, event_mask); 7217 } 7218 7219 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, 7220 bool check_razwi, u64 *event_mask) 7221 { 7222 bool is_read = false, is_write = false; 7223 u16 engine_id[2], num_of_razwi_eng = 0; 7224 char desc[64] = ""; 7225 u64 razwi_addr = 0; 7226 u8 razwi_flags = 0; 7227 7228 /* 7229 * Init engine id by default as not valid and only if razwi initiated from engine with 7230 * engine id it will get valid value. 7231 */ 7232 engine_id[0] = HL_RAZWI_NA_ENG_ID; 7233 engine_id[1] = HL_RAZWI_NA_ENG_ID; 7234 7235 gaudi_get_event_desc(event_type, desc, sizeof(desc)); 7236 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7237 event_type, desc); 7238 7239 if (check_razwi) { 7240 gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read, 7241 &is_write); 7242 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask); 7243 7244 if (is_read) 7245 razwi_flags |= HL_RAZWI_READ; 7246 if (is_write) 7247 razwi_flags |= HL_RAZWI_WRITE; 7248 7249 if (engine_id[0] != HL_RAZWI_NA_ENG_ID) { 7250 if (engine_id[1] != HL_RAZWI_NA_ENG_ID) 7251 num_of_razwi_eng = 2; 7252 else 7253 num_of_razwi_eng = 1; 7254 } 7255 7256 if (razwi_flags) 7257 hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, 7258 razwi_flags, event_mask); 7259 } 7260 } 7261 7262 static void gaudi_print_out_of_sync_info(struct hl_device *hdev, 7263 struct cpucp_pkt_sync_err *sync_err) 7264 { 7265 struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ]; 7266 7267 dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n", 7268 le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci)); 7269 } 7270 7271 static void gaudi_print_fw_alive_info(struct hl_device *hdev, 7272 struct hl_eq_fw_alive *fw_alive) 7273 { 7274 dev_err(hdev->dev, 7275 "FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n", 7276 (fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical", 7277 le32_to_cpu(fw_alive->process_id), 7278 le32_to_cpu(fw_alive->thread_id), 7279 le64_to_cpu(fw_alive->uptime_seconds)); 7280 } 7281 7282 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type, 7283 void *data) 7284 { 7285 char desc[64] = "", *type; 7286 struct eq_nic_sei_event *eq_nic_sei = data; 7287 u16 nic_id = event_type - GAUDI_EVENT_NIC_SEI_0; 7288 7289 switch (eq_nic_sei->axi_error_cause) { 7290 case RXB: 7291 type = "RXB"; 7292 break; 7293 case RXE: 7294 type = "RXE"; 7295 break; 7296 case TXS: 7297 type = "TXS"; 7298 break; 7299 case TXE: 7300 type = "TXE"; 7301 break; 7302 case QPC_RESP: 7303 type = "QPC_RESP"; 7304 break; 7305 case NON_AXI_ERR: 7306 type = "NON_AXI_ERR"; 7307 break; 7308 case TMR: 7309 type = "TMR"; 7310 break; 7311 default: 7312 dev_err(hdev->dev, "unknown NIC AXI cause %d\n", 7313 eq_nic_sei->axi_error_cause); 7314 type = "N/A"; 7315 break; 7316 } 7317 7318 snprintf(desc, sizeof(desc), "NIC%d_%s%d", nic_id, type, 7319 eq_nic_sei->id); 7320 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n", 7321 event_type, desc); 7322 } 7323 7324 static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7325 { 7326 /* GAUDI doesn't support any reset except hard-reset */ 7327 return -EPERM; 7328 } 7329 7330 static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device, 7331 struct hl_eq_hbm_ecc_data *hbm_ecc_data) 7332 { 7333 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7334 int rc = 0; 7335 7336 if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 & 7337 CPU_BOOT_DEV_STS0_HBM_ECC_EN) { 7338 if (!hbm_ecc_data) { 7339 dev_err(hdev->dev, "No FW ECC data"); 7340 return 0; 7341 } 7342 7343 wr_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_WR_PAR_MASK, 7344 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7345 rd_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_RD_PAR_MASK, 7346 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7347 ca_par = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_CA_PAR_MASK, 7348 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7349 derr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_DERR_MASK, 7350 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7351 serr = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_SERR_MASK, 7352 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7353 type = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_TYPE_MASK, 7354 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7355 ch = FIELD_GET(CPUCP_PKT_HBM_ECC_INFO_HBM_CH_MASK, 7356 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7357 7358 dev_err(hdev->dev, 7359 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7360 device, ch, wr_par, rd_par, ca_par, serr, derr); 7361 dev_err(hdev->dev, 7362 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7363 device, ch, hbm_ecc_data->first_addr, type, 7364 hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7365 hbm_ecc_data->dec_cnt); 7366 return 0; 7367 } 7368 7369 if (hdev->asic_prop.fw_security_enabled) { 7370 dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7371 return 0; 7372 } 7373 7374 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET; 7375 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) { 7376 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF); 7377 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7378 if (val) { 7379 rc = -EIO; 7380 dev_err(hdev->dev, 7381 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7382 device, ch * 2, val & 0x1, (val >> 1) & 0x1, 7383 (val >> 2) & 0x1, (val >> 3) & 0x1, 7384 (val >> 4) & 0x1); 7385 7386 val2 = RREG32(base + ch * 0x1000 + 0x060); 7387 dev_err(hdev->dev, 7388 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7389 device, ch * 2, 7390 RREG32(base + ch * 0x1000 + 0x064), 7391 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7392 (val2 & 0xFF0000) >> 16, 7393 (val2 & 0xFF000000) >> 24); 7394 } 7395 7396 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF); 7397 val = (val & 0xFF) | ((val >> 8) & 0xFF); 7398 if (val) { 7399 rc = -EIO; 7400 dev_err(hdev->dev, 7401 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7402 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1, 7403 (val >> 2) & 0x1, (val >> 3) & 0x1, 7404 (val >> 4) & 0x1); 7405 7406 val2 = RREG32(base + ch * 0x1000 + 0x070); 7407 dev_err(hdev->dev, 7408 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DEC_CNT=%d\n", 7409 device, ch * 2 + 1, 7410 RREG32(base + ch * 0x1000 + 0x074), 7411 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10, 7412 (val2 & 0xFF0000) >> 16, 7413 (val2 & 0xFF000000) >> 24); 7414 } 7415 7416 /* Clear interrupts */ 7417 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF); 7418 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF); 7419 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F); 7420 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F); 7421 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF); 7422 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF); 7423 } 7424 7425 val = RREG32(base + 0x8F30); 7426 val2 = RREG32(base + 0x8F34); 7427 if (val | val2) { 7428 rc = -EIO; 7429 dev_err(hdev->dev, 7430 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n", 7431 device, val, val2); 7432 } 7433 val = RREG32(base + 0x8F40); 7434 val2 = RREG32(base + 0x8F44); 7435 if (val | val2) { 7436 rc = -EIO; 7437 dev_err(hdev->dev, 7438 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n", 7439 device, val, val2); 7440 } 7441 7442 return rc; 7443 } 7444 7445 static int gaudi_hbm_event_to_dev(u16 hbm_event_type) 7446 { 7447 switch (hbm_event_type) { 7448 case GAUDI_EVENT_HBM0_SPI_0: 7449 case GAUDI_EVENT_HBM0_SPI_1: 7450 return 0; 7451 case GAUDI_EVENT_HBM1_SPI_0: 7452 case GAUDI_EVENT_HBM1_SPI_1: 7453 return 1; 7454 case GAUDI_EVENT_HBM2_SPI_0: 7455 case GAUDI_EVENT_HBM2_SPI_1: 7456 return 2; 7457 case GAUDI_EVENT_HBM3_SPI_0: 7458 case GAUDI_EVENT_HBM3_SPI_1: 7459 return 3; 7460 default: 7461 break; 7462 } 7463 7464 /* Should never happen */ 7465 return 0; 7466 } 7467 7468 static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id, 7469 char *interrupt_name) 7470 { 7471 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i; 7472 bool soft_reset_required = false; 7473 7474 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) & 7475 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK; 7476 7477 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++) 7478 if (tpc_interrupts_cause & BIT(i)) { 7479 dev_err_ratelimited(hdev->dev, 7480 "TPC%d_%s interrupt cause: %s\n", 7481 tpc_id, interrupt_name, 7482 gaudi_tpc_interrupts_cause[i]); 7483 /* If this is QM error, we need to soft-reset */ 7484 if (i == 15) 7485 soft_reset_required = true; 7486 } 7487 7488 /* Clear interrupts */ 7489 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0); 7490 7491 return soft_reset_required; 7492 } 7493 7494 static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type) 7495 { 7496 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1; 7497 } 7498 7499 static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type) 7500 { 7501 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6; 7502 } 7503 7504 static void gaudi_print_clk_change_info(struct hl_device *hdev, u16 event_type, u64 *event_mask) 7505 { 7506 ktime_t zero_time = ktime_set(0, 0); 7507 7508 mutex_lock(&hdev->clk_throttling.lock); 7509 7510 switch (event_type) { 7511 case GAUDI_EVENT_FIX_POWER_ENV_S: 7512 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_POWER; 7513 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_POWER; 7514 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].start = ktime_get(); 7515 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = zero_time; 7516 dev_info_ratelimited(hdev->dev, 7517 "Clock throttling due to power consumption\n"); 7518 break; 7519 7520 case GAUDI_EVENT_FIX_POWER_ENV_E: 7521 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_POWER; 7522 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_POWER].end = ktime_get(); 7523 dev_info_ratelimited(hdev->dev, 7524 "Power envelop is safe, back to optimal clock\n"); 7525 break; 7526 7527 case GAUDI_EVENT_FIX_THERMAL_ENV_S: 7528 hdev->clk_throttling.current_reason |= HL_CLK_THROTTLE_THERMAL; 7529 hdev->clk_throttling.aggregated_reason |= HL_CLK_THROTTLE_THERMAL; 7530 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].start = ktime_get(); 7531 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = zero_time; 7532 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7533 dev_info_ratelimited(hdev->dev, 7534 "Clock throttling due to overheating\n"); 7535 break; 7536 7537 case GAUDI_EVENT_FIX_THERMAL_ENV_E: 7538 hdev->clk_throttling.current_reason &= ~HL_CLK_THROTTLE_THERMAL; 7539 hdev->clk_throttling.timestamp[HL_CLK_THROTTLE_TYPE_THERMAL].end = ktime_get(); 7540 *event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7541 dev_info_ratelimited(hdev->dev, 7542 "Thermal envelop is safe, back to optimal clock\n"); 7543 break; 7544 7545 default: 7546 dev_err(hdev->dev, "Received invalid clock change event %d\n", 7547 event_type); 7548 break; 7549 } 7550 7551 mutex_unlock(&hdev->clk_throttling.lock); 7552 } 7553 7554 static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) 7555 { 7556 struct gaudi_device *gaudi = hdev->asic_specific; 7557 struct hl_info_fw_err_info fw_err_info; 7558 u64 data = le64_to_cpu(eq_entry->data[0]), event_mask = 0; 7559 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); 7560 u32 fw_fatal_err_flag = 0, flags = 0; 7561 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) 7562 >> EQ_CTL_EVENT_TYPE_SHIFT); 7563 bool reset_required, reset_direct = false; 7564 u8 cause; 7565 int rc; 7566 7567 if (event_type >= GAUDI_EVENT_SIZE) { 7568 dev_err(hdev->dev, "Event type %u exceeds maximum of %u", 7569 event_type, GAUDI_EVENT_SIZE - 1); 7570 return; 7571 } 7572 7573 gaudi->events_stat[event_type]++; 7574 gaudi->events_stat_aggregate[event_type]++; 7575 7576 switch (event_type) { 7577 case GAUDI_EVENT_PCIE_CORE_DERR: 7578 case GAUDI_EVENT_PCIE_IF_DERR: 7579 case GAUDI_EVENT_PCIE_PHY_DERR: 7580 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR: 7581 case GAUDI_EVENT_MME0_ACC_DERR: 7582 case GAUDI_EVENT_MME0_SBAB_DERR: 7583 case GAUDI_EVENT_MME1_ACC_DERR: 7584 case GAUDI_EVENT_MME1_SBAB_DERR: 7585 case GAUDI_EVENT_MME2_ACC_DERR: 7586 case GAUDI_EVENT_MME2_SBAB_DERR: 7587 case GAUDI_EVENT_MME3_ACC_DERR: 7588 case GAUDI_EVENT_MME3_SBAB_DERR: 7589 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC: 7590 fallthrough; 7591 case GAUDI_EVENT_CPU_IF_ECC_DERR: 7592 case GAUDI_EVENT_PSOC_MEM_DERR: 7593 case GAUDI_EVENT_PSOC_CORESIGHT_DERR: 7594 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR: 7595 case GAUDI_EVENT_NIC0_DERR ... GAUDI_EVENT_NIC4_DERR: 7596 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR: 7597 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR: 7598 case GAUDI_EVENT_MMU_DERR: 7599 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7600 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7601 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7602 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7603 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7604 goto reset_device; 7605 7606 case GAUDI_EVENT_GIC500: 7607 case GAUDI_EVENT_AXI_ECC: 7608 case GAUDI_EVENT_L2_RAM_ECC: 7609 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7610 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7611 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7612 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7613 goto reset_device; 7614 7615 case GAUDI_EVENT_HBM0_SPI_0: 7616 case GAUDI_EVENT_HBM1_SPI_0: 7617 case GAUDI_EVENT_HBM2_SPI_0: 7618 case GAUDI_EVENT_HBM3_SPI_0: 7619 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7620 gaudi_hbm_read_interrupts(hdev, 7621 gaudi_hbm_event_to_dev(event_type), 7622 &eq_entry->hbm_ecc_data); 7623 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7624 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7625 goto reset_device; 7626 7627 case GAUDI_EVENT_HBM0_SPI_1: 7628 case GAUDI_EVENT_HBM1_SPI_1: 7629 case GAUDI_EVENT_HBM2_SPI_1: 7630 case GAUDI_EVENT_HBM3_SPI_1: 7631 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7632 gaudi_hbm_read_interrupts(hdev, 7633 gaudi_hbm_event_to_dev(event_type), 7634 &eq_entry->hbm_ecc_data); 7635 hl_fw_unmask_irq(hdev, event_type); 7636 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7637 break; 7638 7639 case GAUDI_EVENT_TPC0_DEC: 7640 case GAUDI_EVENT_TPC1_DEC: 7641 case GAUDI_EVENT_TPC2_DEC: 7642 case GAUDI_EVENT_TPC3_DEC: 7643 case GAUDI_EVENT_TPC4_DEC: 7644 case GAUDI_EVENT_TPC5_DEC: 7645 case GAUDI_EVENT_TPC6_DEC: 7646 case GAUDI_EVENT_TPC7_DEC: 7647 /* In TPC DEC event, notify on TPC assertion. While there isn't 7648 * a specific event for assertion yet, the FW generates TPC DEC event. 7649 * The SW upper layer will inspect an internal mapped area to indicate 7650 * if the event is a TPC Assertion or a "real" TPC DEC. 7651 */ 7652 event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7653 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7654 reset_required = gaudi_tpc_read_interrupts(hdev, 7655 tpc_dec_event_to_tpc_id(event_type), 7656 "AXI_SLV_DEC_Error"); 7657 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7658 if (reset_required) { 7659 dev_err(hdev->dev, "reset required due to %s\n", 7660 gaudi_irq_map_table[event_type].name); 7661 7662 reset_direct = true; 7663 goto reset_device; 7664 } else { 7665 hl_fw_unmask_irq(hdev, event_type); 7666 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7667 } 7668 break; 7669 7670 case GAUDI_EVENT_TPC0_KRN_ERR: 7671 case GAUDI_EVENT_TPC1_KRN_ERR: 7672 case GAUDI_EVENT_TPC2_KRN_ERR: 7673 case GAUDI_EVENT_TPC3_KRN_ERR: 7674 case GAUDI_EVENT_TPC4_KRN_ERR: 7675 case GAUDI_EVENT_TPC5_KRN_ERR: 7676 case GAUDI_EVENT_TPC6_KRN_ERR: 7677 case GAUDI_EVENT_TPC7_KRN_ERR: 7678 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7679 reset_required = gaudi_tpc_read_interrupts(hdev, 7680 tpc_krn_event_to_tpc_id(event_type), 7681 "KRN_ERR"); 7682 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7683 if (reset_required) { 7684 dev_err(hdev->dev, "reset required due to %s\n", 7685 gaudi_irq_map_table[event_type].name); 7686 7687 reset_direct = true; 7688 goto reset_device; 7689 } else { 7690 hl_fw_unmask_irq(hdev, event_type); 7691 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7692 } 7693 break; 7694 7695 case GAUDI_EVENT_PCIE_CORE_SERR: 7696 case GAUDI_EVENT_PCIE_IF_SERR: 7697 case GAUDI_EVENT_PCIE_PHY_SERR: 7698 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR: 7699 case GAUDI_EVENT_MME0_ACC_SERR: 7700 case GAUDI_EVENT_MME0_SBAB_SERR: 7701 case GAUDI_EVENT_MME1_ACC_SERR: 7702 case GAUDI_EVENT_MME1_SBAB_SERR: 7703 case GAUDI_EVENT_MME2_ACC_SERR: 7704 case GAUDI_EVENT_MME2_SBAB_SERR: 7705 case GAUDI_EVENT_MME3_ACC_SERR: 7706 case GAUDI_EVENT_MME3_SBAB_SERR: 7707 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC: 7708 case GAUDI_EVENT_CPU_IF_ECC_SERR: 7709 case GAUDI_EVENT_PSOC_MEM_SERR: 7710 case GAUDI_EVENT_PSOC_CORESIGHT_SERR: 7711 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR: 7712 case GAUDI_EVENT_NIC0_SERR ... GAUDI_EVENT_NIC4_SERR: 7713 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR: 7714 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR: 7715 fallthrough; 7716 case GAUDI_EVENT_MMU_SERR: 7717 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7718 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7719 hl_fw_unmask_irq(hdev, event_type); 7720 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7721 break; 7722 7723 case GAUDI_EVENT_PCIE_DEC: 7724 case GAUDI_EVENT_CPU_AXI_SPLITTER: 7725 case GAUDI_EVENT_PSOC_AXI_DEC: 7726 case GAUDI_EVENT_PSOC_PRSTN_FALL: 7727 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7728 hl_fw_unmask_irq(hdev, event_type); 7729 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7730 break; 7731 7732 case GAUDI_EVENT_MMU_PAGE_FAULT: 7733 case GAUDI_EVENT_MMU_WR_PERM: 7734 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7735 hl_fw_unmask_irq(hdev, event_type); 7736 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7737 break; 7738 7739 case GAUDI_EVENT_MME0_WBC_RSP: 7740 case GAUDI_EVENT_MME0_SBAB0_RSP: 7741 case GAUDI_EVENT_MME1_WBC_RSP: 7742 case GAUDI_EVENT_MME1_SBAB0_RSP: 7743 case GAUDI_EVENT_MME2_WBC_RSP: 7744 case GAUDI_EVENT_MME2_SBAB0_RSP: 7745 case GAUDI_EVENT_MME3_WBC_RSP: 7746 case GAUDI_EVENT_MME3_SBAB0_RSP: 7747 case GAUDI_EVENT_RAZWI_OR_ADC: 7748 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7749 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: 7750 fallthrough; 7751 case GAUDI_EVENT_NIC0_QM0: 7752 case GAUDI_EVENT_NIC0_QM1: 7753 case GAUDI_EVENT_NIC1_QM0: 7754 case GAUDI_EVENT_NIC1_QM1: 7755 case GAUDI_EVENT_NIC2_QM0: 7756 case GAUDI_EVENT_NIC2_QM1: 7757 case GAUDI_EVENT_NIC3_QM0: 7758 case GAUDI_EVENT_NIC3_QM1: 7759 case GAUDI_EVENT_NIC4_QM0: 7760 case GAUDI_EVENT_NIC4_QM1: 7761 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE: 7762 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7763 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7764 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7765 hl_fw_unmask_irq(hdev, event_type); 7766 event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7767 break; 7768 7769 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7770 gaudi_print_irq_info(hdev, event_type, true, &event_mask); 7771 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7772 goto reset_device; 7773 7774 case GAUDI_EVENT_TPC0_BMON_SPMU: 7775 case GAUDI_EVENT_TPC1_BMON_SPMU: 7776 case GAUDI_EVENT_TPC2_BMON_SPMU: 7777 case GAUDI_EVENT_TPC3_BMON_SPMU: 7778 case GAUDI_EVENT_TPC4_BMON_SPMU: 7779 case GAUDI_EVENT_TPC5_BMON_SPMU: 7780 case GAUDI_EVENT_TPC6_BMON_SPMU: 7781 case GAUDI_EVENT_TPC7_BMON_SPMU: 7782 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7783 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7784 hl_fw_unmask_irq(hdev, event_type); 7785 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7786 break; 7787 7788 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7789 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7790 hl_fw_unmask_irq(hdev, event_type); 7791 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7792 break; 7793 7794 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: 7795 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7796 gaudi_print_sm_sei_info(hdev, event_type, 7797 &eq_entry->sm_sei_data); 7798 rc = hl_state_dump(hdev); 7799 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7800 if (rc) 7801 dev_err(hdev->dev, 7802 "Error during system state dump %d\n", rc); 7803 hl_fw_unmask_irq(hdev, event_type); 7804 break; 7805 7806 case GAUDI_EVENT_STATUS_NIC0_ENG0 ... GAUDI_EVENT_STATUS_NIC4_ENG1: 7807 break; 7808 7809 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7810 gaudi_print_clk_change_info(hdev, event_type, &event_mask); 7811 hl_fw_unmask_irq(hdev, event_type); 7812 break; 7813 7814 case GAUDI_EVENT_PSOC_GPIO_U16_0: 7815 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF; 7816 dev_err(hdev->dev, 7817 "Received high temp H/W interrupt %d (cause %d)\n", 7818 event_type, cause); 7819 event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7820 break; 7821 7822 case GAUDI_EVENT_DEV_RESET_REQ: 7823 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7824 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7825 goto reset_device; 7826 7827 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7828 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7829 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7830 event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7831 goto reset_device; 7832 7833 case GAUDI_EVENT_FW_ALIVE_S: 7834 gaudi_print_irq_info(hdev, event_type, false, &event_mask); 7835 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7836 fw_err_info.err_type = HL_INFO_FW_REPORTED_ERR; 7837 fw_err_info.event_id = event_type; 7838 fw_err_info.event_mask = &event_mask; 7839 hl_handle_fw_err(hdev, &fw_err_info); 7840 goto reset_device; 7841 7842 default: 7843 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n", 7844 event_type); 7845 break; 7846 } 7847 7848 if (event_mask) 7849 hl_notifier_event_send_all(hdev, event_mask); 7850 7851 return; 7852 7853 reset_device: 7854 reset_required = true; 7855 7856 if (hdev->asic_prop.fw_security_enabled && !reset_direct) { 7857 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW | fw_fatal_err_flag; 7858 7859 /* notify on device unavailable while the reset triggered by fw */ 7860 event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET | 7861 HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE); 7862 } else if (hdev->hard_reset_on_fw_events) { 7863 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY | fw_fatal_err_flag; 7864 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7865 } else { 7866 reset_required = false; 7867 } 7868 7869 if (reset_required) { 7870 /* escalate general hw errors to critical/fatal error */ 7871 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 7872 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 7873 7874 hl_device_cond_reset(hdev, flags, event_mask); 7875 } else { 7876 hl_fw_unmask_irq(hdev, event_type); 7877 /* Notification on occurred event needs to be sent although reset is not executed */ 7878 if (event_mask) 7879 hl_notifier_event_send_all(hdev, event_mask); 7880 } 7881 } 7882 7883 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size) 7884 { 7885 struct gaudi_device *gaudi = hdev->asic_specific; 7886 7887 if (aggregate) { 7888 *size = (u32) sizeof(gaudi->events_stat_aggregate); 7889 return gaudi->events_stat_aggregate; 7890 } 7891 7892 *size = (u32) sizeof(gaudi->events_stat); 7893 return gaudi->events_stat; 7894 } 7895 7896 static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags) 7897 { 7898 struct gaudi_device *gaudi = hdev->asic_specific; 7899 u32 status, timeout_usec; 7900 int rc; 7901 7902 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) || 7903 hdev->reset_info.hard_reset_pending) 7904 return 0; 7905 7906 if (hdev->pldm) 7907 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7908 else 7909 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7910 7911 /* L0 & L1 invalidation */ 7912 WREG32(mmSTLB_INV_PS, 3); 7913 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++); 7914 WREG32(mmSTLB_INV_PS, 2); 7915 7916 rc = hl_poll_timeout( 7917 hdev, 7918 mmSTLB_INV_PS, 7919 status, 7920 !status, 7921 1000, 7922 timeout_usec); 7923 7924 WREG32(mmSTLB_INV_SET, 0); 7925 7926 return rc; 7927 } 7928 7929 static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev, 7930 bool is_hard, u32 flags, 7931 u32 asid, u64 va, u64 size) 7932 { 7933 /* Treat as invalidate all because there is no range invalidation 7934 * in Gaudi 7935 */ 7936 return hdev->asic_funcs->mmu_invalidate_cache(hdev, is_hard, flags); 7937 } 7938 7939 static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid, u64 phys_addr) 7940 { 7941 u32 status, timeout_usec; 7942 int rc; 7943 7944 if (hdev->pldm) 7945 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC; 7946 else 7947 timeout_usec = MMU_CONFIG_TIMEOUT_USEC; 7948 7949 WREG32(MMU_ASID, asid); 7950 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT); 7951 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT); 7952 WREG32(MMU_BUSY, 0x80000000); 7953 7954 rc = hl_poll_timeout( 7955 hdev, 7956 MMU_BUSY, 7957 status, 7958 !(status & 0x80000000), 7959 1000, 7960 timeout_usec); 7961 7962 if (rc) { 7963 dev_err(hdev->dev, 7964 "Timeout during MMU hop0 config of asid %d\n", asid); 7965 return rc; 7966 } 7967 7968 return 0; 7969 } 7970 7971 static int gaudi_send_heartbeat(struct hl_device *hdev) 7972 { 7973 struct gaudi_device *gaudi = hdev->asic_specific; 7974 7975 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7976 return 0; 7977 7978 return hl_fw_send_heartbeat(hdev); 7979 } 7980 7981 static int gaudi_cpucp_info_get(struct hl_device *hdev) 7982 { 7983 struct gaudi_device *gaudi = hdev->asic_specific; 7984 struct asic_fixed_properties *prop = &hdev->asic_prop; 7985 int rc; 7986 7987 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 7988 return 0; 7989 7990 rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, 7991 mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0, 7992 mmCPU_BOOT_ERR1); 7993 if (rc) 7994 return rc; 7995 7996 if (!strlen(prop->cpucp_info.card_name)) 7997 strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, 7998 CARD_NAME_MAX_LEN); 7999 8000 hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); 8001 8002 set_default_power_values(hdev); 8003 8004 return 0; 8005 } 8006 8007 static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8008 struct engines_data *e) 8009 { 8010 struct gaudi_device *gaudi = hdev->asic_specific; 8011 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; 8012 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n"; 8013 const char *nic_fmt = "%-5d%-9s%#-14x%#x\n"; 8014 unsigned long *mask = (unsigned long *)mask_arr; 8015 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts; 8016 bool is_idle = true, is_eng_idle, is_slave; 8017 u64 offset; 8018 int i, dma_id, port; 8019 8020 if (e) 8021 hl_engine_data_sprintf(e, 8022 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8023 "--- ------- ------------ ---------- -------------\n"); 8024 8025 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) { 8026 dma_id = gaudi_dma_assignment[i]; 8027 offset = dma_id * DMA_QMAN_OFFSET; 8028 8029 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset); 8030 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset); 8031 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset); 8032 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8033 IS_DMA_IDLE(dma_core_sts0); 8034 is_idle &= is_eng_idle; 8035 8036 if (mask && !is_eng_idle) 8037 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8038 if (e) 8039 hl_engine_data_sprintf(e, fmt, dma_id, 8040 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8041 qm_cgm_sts, dma_core_sts0); 8042 } 8043 8044 if (e) 8045 hl_engine_data_sprintf(e, 8046 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8047 "--- ------- ------------ ---------- ----------\n"); 8048 8049 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) { 8050 offset = i * TPC_QMAN_OFFSET; 8051 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset); 8052 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset); 8053 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset); 8054 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) && 8055 IS_TPC_IDLE(tpc_cfg_sts); 8056 is_idle &= is_eng_idle; 8057 8058 if (mask && !is_eng_idle) 8059 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8060 if (e) 8061 hl_engine_data_sprintf(e, fmt, i, 8062 is_eng_idle ? "Y" : "N", 8063 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8064 } 8065 8066 if (e) 8067 hl_engine_data_sprintf(e, 8068 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8069 "--- ------- ------------ ---------- -----------\n"); 8070 8071 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) { 8072 offset = i * MME_QMAN_OFFSET; 8073 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset); 8074 is_eng_idle = IS_MME_IDLE(mme_arch_sts); 8075 8076 /* MME 1 & 3 are slaves, no need to check their QMANs */ 8077 is_slave = i % 2; 8078 if (!is_slave) { 8079 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset); 8080 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset); 8081 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8082 } 8083 8084 is_idle &= is_eng_idle; 8085 8086 if (mask && !is_eng_idle) 8087 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8088 if (e) { 8089 if (!is_slave) 8090 hl_engine_data_sprintf(e, fmt, i, 8091 is_eng_idle ? "Y" : "N", 8092 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8093 else 8094 hl_engine_data_sprintf(e, mme_slave_fmt, i, 8095 is_eng_idle ? "Y" : "N", "-", 8096 "-", mme_arch_sts); 8097 } 8098 } 8099 8100 if (e) 8101 hl_engine_data_sprintf(e, 8102 "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8103 "--- ------- ------------ ----------\n"); 8104 8105 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { 8106 offset = i * NIC_MACRO_QMAN_OFFSET; 8107 port = 2 * i; 8108 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8109 qm_glbl_sts0 = RREG32(mmNIC0_QM0_GLBL_STS0 + offset); 8110 qm_cgm_sts = RREG32(mmNIC0_QM0_CGM_STS + offset); 8111 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8112 is_idle &= is_eng_idle; 8113 8114 if (mask && !is_eng_idle) 8115 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8116 if (e) 8117 hl_engine_data_sprintf(e, nic_fmt, port, 8118 is_eng_idle ? "Y" : "N", 8119 qm_glbl_sts0, qm_cgm_sts); 8120 } 8121 8122 port = 2 * i + 1; 8123 if (gaudi->hw_cap_initialized & BIT(HW_CAP_NIC_SHIFT + port)) { 8124 qm_glbl_sts0 = RREG32(mmNIC0_QM1_GLBL_STS0 + offset); 8125 qm_cgm_sts = RREG32(mmNIC0_QM1_CGM_STS + offset); 8126 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts); 8127 is_idle &= is_eng_idle; 8128 8129 if (mask && !is_eng_idle) 8130 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8131 if (e) 8132 hl_engine_data_sprintf(e, nic_fmt, port, 8133 is_eng_idle ? "Y" : "N", 8134 qm_glbl_sts0, qm_cgm_sts); 8135 } 8136 } 8137 8138 if (e) 8139 hl_engine_data_sprintf(e, "\n"); 8140 8141 return is_idle; 8142 } 8143 8144 static void gaudi_hw_queues_lock(struct hl_device *hdev) 8145 __acquires(&gaudi->hw_queues_lock) 8146 { 8147 struct gaudi_device *gaudi = hdev->asic_specific; 8148 8149 spin_lock(&gaudi->hw_queues_lock); 8150 } 8151 8152 static void gaudi_hw_queues_unlock(struct hl_device *hdev) 8153 __releases(&gaudi->hw_queues_lock) 8154 { 8155 struct gaudi_device *gaudi = hdev->asic_specific; 8156 8157 spin_unlock(&gaudi->hw_queues_lock); 8158 } 8159 8160 static u32 gaudi_get_pci_id(struct hl_device *hdev) 8161 { 8162 return hdev->pdev->device; 8163 } 8164 8165 static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, 8166 size_t max_size) 8167 { 8168 struct gaudi_device *gaudi = hdev->asic_specific; 8169 8170 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8171 return 0; 8172 8173 return hl_fw_get_eeprom_data(hdev, data, max_size); 8174 } 8175 8176 static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) 8177 { 8178 struct gaudi_device *gaudi = hdev->asic_specific; 8179 8180 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) 8181 return 0; 8182 8183 return hl_fw_get_monitor_dump(hdev, data); 8184 } 8185 8186 /* 8187 * this function should be used only during initialization and/or after reset, 8188 * when there are no active users. 8189 */ 8190 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel, u32 tpc_id) 8191 { 8192 u64 kernel_timeout; 8193 u32 status, offset; 8194 int rc; 8195 8196 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS); 8197 8198 if (hdev->pldm) 8199 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC; 8200 else 8201 kernel_timeout = HL_DEVICE_TIMEOUT_USEC; 8202 8203 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset, 8204 lower_32_bits(tpc_kernel)); 8205 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset, 8206 upper_32_bits(tpc_kernel)); 8207 8208 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset, 8209 lower_32_bits(tpc_kernel)); 8210 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset, 8211 upper_32_bits(tpc_kernel)); 8212 /* set a valid LUT pointer, content is of no significance */ 8213 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset, 8214 lower_32_bits(tpc_kernel)); 8215 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset, 8216 upper_32_bits(tpc_kernel)); 8217 8218 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset, 8219 lower_32_bits(CFG_BASE + 8220 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)); 8221 8222 WREG32(mmTPC0_CFG_TPC_CMD + offset, 8223 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT | 8224 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT)); 8225 /* wait a bit for the engine to start executing */ 8226 usleep_range(1000, 1500); 8227 8228 /* wait until engine has finished executing */ 8229 rc = hl_poll_timeout( 8230 hdev, 8231 mmTPC0_CFG_STATUS + offset, 8232 status, 8233 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8234 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8235 1000, 8236 kernel_timeout); 8237 8238 if (rc) { 8239 dev_err(hdev->dev, 8240 "Timeout while waiting for TPC%d icache prefetch\n", 8241 tpc_id); 8242 return -EIO; 8243 } 8244 8245 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset, 8246 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT); 8247 8248 /* wait a bit for the engine to start executing */ 8249 usleep_range(1000, 1500); 8250 8251 /* wait until engine has finished executing */ 8252 rc = hl_poll_timeout( 8253 hdev, 8254 mmTPC0_CFG_STATUS + offset, 8255 status, 8256 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) == 8257 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK, 8258 1000, 8259 kernel_timeout); 8260 8261 if (rc) { 8262 dev_err(hdev->dev, 8263 "Timeout while waiting for TPC%d vector pipe\n", 8264 tpc_id); 8265 return -EIO; 8266 } 8267 8268 rc = hl_poll_timeout( 8269 hdev, 8270 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset, 8271 status, 8272 (status == 0), 8273 1000, 8274 kernel_timeout); 8275 8276 if (rc) { 8277 dev_err(hdev->dev, 8278 "Timeout while waiting for TPC%d kernel to execute\n", 8279 tpc_id); 8280 return -EIO; 8281 } 8282 8283 return 0; 8284 } 8285 8286 static int gaudi_internal_cb_pool_init(struct hl_device *hdev, 8287 struct hl_ctx *ctx) 8288 { 8289 struct gaudi_device *gaudi = hdev->asic_specific; 8290 int min_alloc_order, rc, collective_cb_size; 8291 8292 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8293 return 0; 8294 8295 hdev->internal_cb_pool_virt_addr = hl_asic_dma_alloc_coherent(hdev, 8296 HOST_SPACE_INTERNAL_CB_SZ, 8297 &hdev->internal_cb_pool_dma_addr, 8298 GFP_KERNEL | __GFP_ZERO); 8299 8300 if (!hdev->internal_cb_pool_virt_addr) 8301 return -ENOMEM; 8302 8303 collective_cb_size = sizeof(struct packet_msg_short) * 5 + 8304 sizeof(struct packet_fence); 8305 min_alloc_order = ilog2(collective_cb_size); 8306 8307 hdev->internal_cb_pool = gen_pool_create(min_alloc_order, -1); 8308 if (!hdev->internal_cb_pool) { 8309 dev_err(hdev->dev, 8310 "Failed to create internal CB pool\n"); 8311 rc = -ENOMEM; 8312 goto free_internal_cb_pool; 8313 } 8314 8315 rc = gen_pool_add(hdev->internal_cb_pool, 8316 (uintptr_t) hdev->internal_cb_pool_virt_addr, 8317 HOST_SPACE_INTERNAL_CB_SZ, -1); 8318 if (rc) { 8319 dev_err(hdev->dev, 8320 "Failed to add memory to internal CB pool\n"); 8321 rc = -EFAULT; 8322 goto destroy_internal_cb_pool; 8323 } 8324 8325 hdev->internal_cb_va_base = hl_reserve_va_block(hdev, ctx, 8326 HL_VA_RANGE_TYPE_HOST, HOST_SPACE_INTERNAL_CB_SZ, 8327 HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 8328 8329 if (!hdev->internal_cb_va_base) { 8330 rc = -ENOMEM; 8331 goto destroy_internal_cb_pool; 8332 } 8333 8334 mutex_lock(&hdev->mmu_lock); 8335 8336 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8337 hdev->internal_cb_pool_dma_addr, 8338 HOST_SPACE_INTERNAL_CB_SZ); 8339 if (rc) 8340 goto unreserve_internal_cb_pool; 8341 8342 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8343 if (rc) 8344 goto unmap_internal_cb_pool; 8345 8346 mutex_unlock(&hdev->mmu_lock); 8347 8348 return 0; 8349 8350 unmap_internal_cb_pool: 8351 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8352 HOST_SPACE_INTERNAL_CB_SZ); 8353 unreserve_internal_cb_pool: 8354 mutex_unlock(&hdev->mmu_lock); 8355 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8356 HOST_SPACE_INTERNAL_CB_SZ); 8357 destroy_internal_cb_pool: 8358 gen_pool_destroy(hdev->internal_cb_pool); 8359 free_internal_cb_pool: 8360 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8361 hdev->internal_cb_pool_dma_addr); 8362 8363 return rc; 8364 } 8365 8366 static void gaudi_internal_cb_pool_fini(struct hl_device *hdev, 8367 struct hl_ctx *ctx) 8368 { 8369 struct gaudi_device *gaudi = hdev->asic_specific; 8370 8371 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8372 return; 8373 8374 mutex_lock(&hdev->mmu_lock); 8375 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8376 HOST_SPACE_INTERNAL_CB_SZ); 8377 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8378 HOST_SPACE_INTERNAL_CB_SZ); 8379 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8380 mutex_unlock(&hdev->mmu_lock); 8381 8382 gen_pool_destroy(hdev->internal_cb_pool); 8383 8384 hl_asic_dma_free_coherent(hdev, HOST_SPACE_INTERNAL_CB_SZ, hdev->internal_cb_pool_virt_addr, 8385 hdev->internal_cb_pool_dma_addr); 8386 } 8387 8388 static int gaudi_ctx_init(struct hl_ctx *ctx) 8389 { 8390 int rc; 8391 8392 if (ctx->asid == HL_KERNEL_ASID_ID) 8393 return 0; 8394 8395 rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx); 8396 if (rc) 8397 return rc; 8398 8399 rc = gaudi_restore_user_registers(ctx->hdev); 8400 if (rc) 8401 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8402 8403 return rc; 8404 } 8405 8406 static void gaudi_ctx_fini(struct hl_ctx *ctx) 8407 { 8408 if (ctx->asid == HL_KERNEL_ASID_ID) 8409 return; 8410 8411 gaudi_internal_cb_pool_fini(ctx->hdev, ctx); 8412 } 8413 8414 static int gaudi_pre_schedule_cs(struct hl_cs *cs) 8415 { 8416 return 0; 8417 } 8418 8419 static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx) 8420 { 8421 return gaudi_cq_assignment[cq_idx]; 8422 } 8423 8424 static u32 gaudi_get_signal_cb_size(struct hl_device *hdev) 8425 { 8426 return sizeof(struct packet_msg_short) + 8427 sizeof(struct packet_msg_prot) * 2; 8428 } 8429 8430 static u32 gaudi_get_wait_cb_size(struct hl_device *hdev) 8431 { 8432 return sizeof(struct packet_msg_short) * 4 + 8433 sizeof(struct packet_fence) + 8434 sizeof(struct packet_msg_prot) * 2; 8435 } 8436 8437 static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id) 8438 { 8439 return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4); 8440 } 8441 8442 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, 8443 u32 size, bool eb) 8444 { 8445 struct hl_cb *cb = (struct hl_cb *) data; 8446 struct packet_msg_short *pkt; 8447 u32 value, ctl, pkt_size = sizeof(*pkt); 8448 8449 pkt = cb->kernel_address + size; 8450 memset(pkt, 0, pkt_size); 8451 8452 /* Inc by 1, Mode ADD */ 8453 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_MASK, 1); 8454 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_SOB_MOD_MASK, 1); 8455 8456 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4); 8457 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8458 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */ 8459 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8460 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb); 8461 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8462 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8463 8464 pkt->value = cpu_to_le32(value); 8465 pkt->ctl = cpu_to_le32(ctl); 8466 8467 return size + pkt_size; 8468 } 8469 8470 static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value, 8471 u16 addr) 8472 { 8473 u32 ctl, pkt_size = sizeof(*pkt); 8474 8475 memset(pkt, 0, pkt_size); 8476 8477 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr); 8478 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8479 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8480 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8481 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8482 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */ 8483 8484 pkt->value = cpu_to_le32(value); 8485 pkt->ctl = cpu_to_le32(ctl); 8486 8487 return pkt_size; 8488 } 8489 8490 static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev, 8491 struct packet_msg_short *pkt, u16 sob_base, u8 sob_mask, 8492 u16 sob_val, u16 mon_id) 8493 { 8494 u64 monitor_base; 8495 u32 ctl, value, pkt_size = sizeof(*pkt); 8496 u16 msg_addr_offset; 8497 u8 mask; 8498 8499 if (hl_gen_sob_mask(sob_base, sob_mask, &mask)) { 8500 dev_err(hdev->dev, 8501 "sob_base %u (mask %#x) is not valid\n", 8502 sob_base, sob_mask); 8503 return 0; 8504 } 8505 8506 /* 8507 * monitor_base should be the content of the base0 address registers, 8508 * so it will be added to the msg short offsets 8509 */ 8510 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8511 8512 msg_addr_offset = 8513 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) - 8514 monitor_base; 8515 8516 memset(pkt, 0, pkt_size); 8517 8518 /* Monitor config packet: bind the monitor to a sync object */ 8519 value = FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_MASK, sob_base / 8); 8520 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_MASK, sob_val); 8521 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MODE_MASK, 8522 0); /* GREATER OR EQUAL*/ 8523 value |= FIELD_PREP(GAUDI_PKT_SHORT_VAL_MON_MASK_MASK, mask); 8524 8525 ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset); 8526 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */ 8527 ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */ 8528 ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT); 8529 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8530 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8531 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8532 8533 pkt->value = cpu_to_le32(value); 8534 pkt->ctl = cpu_to_le32(ctl); 8535 8536 return pkt_size; 8537 } 8538 8539 static u32 gaudi_add_fence_pkt(struct packet_fence *pkt) 8540 { 8541 u32 ctl, cfg, pkt_size = sizeof(*pkt); 8542 8543 memset(pkt, 0, pkt_size); 8544 8545 cfg = FIELD_PREP(GAUDI_PKT_FENCE_CFG_DEC_VAL_MASK, 1); 8546 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1); 8547 cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2); 8548 8549 ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE); 8550 ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0); 8551 ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1); 8552 ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1); 8553 8554 pkt->cfg = cpu_to_le32(cfg); 8555 pkt->ctl = cpu_to_le32(ctl); 8556 8557 return pkt_size; 8558 } 8559 8560 static int gaudi_get_fence_addr(struct hl_device *hdev, u32 queue_id, u64 *addr) 8561 { 8562 u32 offset, nic_index; 8563 8564 switch (queue_id) { 8565 case GAUDI_QUEUE_ID_DMA_0_0: 8566 offset = mmDMA0_QM_CP_FENCE2_RDATA_0; 8567 break; 8568 case GAUDI_QUEUE_ID_DMA_0_1: 8569 offset = mmDMA0_QM_CP_FENCE2_RDATA_1; 8570 break; 8571 case GAUDI_QUEUE_ID_DMA_0_2: 8572 offset = mmDMA0_QM_CP_FENCE2_RDATA_2; 8573 break; 8574 case GAUDI_QUEUE_ID_DMA_0_3: 8575 offset = mmDMA0_QM_CP_FENCE2_RDATA_3; 8576 break; 8577 case GAUDI_QUEUE_ID_DMA_1_0: 8578 offset = mmDMA1_QM_CP_FENCE2_RDATA_0; 8579 break; 8580 case GAUDI_QUEUE_ID_DMA_1_1: 8581 offset = mmDMA1_QM_CP_FENCE2_RDATA_1; 8582 break; 8583 case GAUDI_QUEUE_ID_DMA_1_2: 8584 offset = mmDMA1_QM_CP_FENCE2_RDATA_2; 8585 break; 8586 case GAUDI_QUEUE_ID_DMA_1_3: 8587 offset = mmDMA1_QM_CP_FENCE2_RDATA_3; 8588 break; 8589 case GAUDI_QUEUE_ID_DMA_5_0: 8590 offset = mmDMA5_QM_CP_FENCE2_RDATA_0; 8591 break; 8592 case GAUDI_QUEUE_ID_DMA_5_1: 8593 offset = mmDMA5_QM_CP_FENCE2_RDATA_1; 8594 break; 8595 case GAUDI_QUEUE_ID_DMA_5_2: 8596 offset = mmDMA5_QM_CP_FENCE2_RDATA_2; 8597 break; 8598 case GAUDI_QUEUE_ID_DMA_5_3: 8599 offset = mmDMA5_QM_CP_FENCE2_RDATA_3; 8600 break; 8601 case GAUDI_QUEUE_ID_TPC_7_0: 8602 offset = mmTPC7_QM_CP_FENCE2_RDATA_0; 8603 break; 8604 case GAUDI_QUEUE_ID_TPC_7_1: 8605 offset = mmTPC7_QM_CP_FENCE2_RDATA_1; 8606 break; 8607 case GAUDI_QUEUE_ID_TPC_7_2: 8608 offset = mmTPC7_QM_CP_FENCE2_RDATA_2; 8609 break; 8610 case GAUDI_QUEUE_ID_TPC_7_3: 8611 offset = mmTPC7_QM_CP_FENCE2_RDATA_3; 8612 break; 8613 case GAUDI_QUEUE_ID_NIC_0_0: 8614 case GAUDI_QUEUE_ID_NIC_1_0: 8615 case GAUDI_QUEUE_ID_NIC_2_0: 8616 case GAUDI_QUEUE_ID_NIC_3_0: 8617 case GAUDI_QUEUE_ID_NIC_4_0: 8618 case GAUDI_QUEUE_ID_NIC_5_0: 8619 case GAUDI_QUEUE_ID_NIC_6_0: 8620 case GAUDI_QUEUE_ID_NIC_7_0: 8621 case GAUDI_QUEUE_ID_NIC_8_0: 8622 case GAUDI_QUEUE_ID_NIC_9_0: 8623 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_0) >> 2; 8624 offset = mmNIC0_QM0_CP_FENCE2_RDATA_0 + 8625 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8626 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8627 break; 8628 case GAUDI_QUEUE_ID_NIC_0_1: 8629 case GAUDI_QUEUE_ID_NIC_1_1: 8630 case GAUDI_QUEUE_ID_NIC_2_1: 8631 case GAUDI_QUEUE_ID_NIC_3_1: 8632 case GAUDI_QUEUE_ID_NIC_4_1: 8633 case GAUDI_QUEUE_ID_NIC_5_1: 8634 case GAUDI_QUEUE_ID_NIC_6_1: 8635 case GAUDI_QUEUE_ID_NIC_7_1: 8636 case GAUDI_QUEUE_ID_NIC_8_1: 8637 case GAUDI_QUEUE_ID_NIC_9_1: 8638 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_1) >> 2; 8639 offset = mmNIC0_QM0_CP_FENCE2_RDATA_1 + 8640 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8641 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8642 break; 8643 case GAUDI_QUEUE_ID_NIC_0_2: 8644 case GAUDI_QUEUE_ID_NIC_1_2: 8645 case GAUDI_QUEUE_ID_NIC_2_2: 8646 case GAUDI_QUEUE_ID_NIC_3_2: 8647 case GAUDI_QUEUE_ID_NIC_4_2: 8648 case GAUDI_QUEUE_ID_NIC_5_2: 8649 case GAUDI_QUEUE_ID_NIC_6_2: 8650 case GAUDI_QUEUE_ID_NIC_7_2: 8651 case GAUDI_QUEUE_ID_NIC_8_2: 8652 case GAUDI_QUEUE_ID_NIC_9_2: 8653 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_2) >> 2; 8654 offset = mmNIC0_QM0_CP_FENCE2_RDATA_2 + 8655 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8656 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8657 break; 8658 case GAUDI_QUEUE_ID_NIC_0_3: 8659 case GAUDI_QUEUE_ID_NIC_1_3: 8660 case GAUDI_QUEUE_ID_NIC_2_3: 8661 case GAUDI_QUEUE_ID_NIC_3_3: 8662 case GAUDI_QUEUE_ID_NIC_4_3: 8663 case GAUDI_QUEUE_ID_NIC_5_3: 8664 case GAUDI_QUEUE_ID_NIC_6_3: 8665 case GAUDI_QUEUE_ID_NIC_7_3: 8666 case GAUDI_QUEUE_ID_NIC_8_3: 8667 case GAUDI_QUEUE_ID_NIC_9_3: 8668 nic_index = (queue_id - GAUDI_QUEUE_ID_NIC_0_3) >> 2; 8669 offset = mmNIC0_QM0_CP_FENCE2_RDATA_3 + 8670 (nic_index >> 1) * NIC_MACRO_QMAN_OFFSET + 8671 (nic_index & 0x1) * NIC_ENGINE_QMAN_OFFSET; 8672 break; 8673 default: 8674 return -EINVAL; 8675 } 8676 8677 *addr = CFG_BASE + offset; 8678 8679 return 0; 8680 } 8681 8682 static u32 gaudi_add_mon_pkts(void *buf, u16 mon_id, u64 fence_addr) 8683 { 8684 u64 monitor_base; 8685 u32 size = 0; 8686 u16 msg_addr_offset; 8687 8688 /* 8689 * monitor_base should be the content of the base0 address registers, 8690 * so it will be added to the msg short offsets 8691 */ 8692 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0; 8693 8694 /* First monitor config packet: low address of the sync */ 8695 msg_addr_offset = 8696 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) - 8697 monitor_base; 8698 8699 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr, 8700 msg_addr_offset); 8701 8702 /* Second monitor config packet: high address of the sync */ 8703 msg_addr_offset = 8704 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) - 8705 monitor_base; 8706 8707 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32), 8708 msg_addr_offset); 8709 8710 /* 8711 * Third monitor config packet: the payload, i.e. what to write when the 8712 * sync triggers 8713 */ 8714 msg_addr_offset = 8715 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) - 8716 monitor_base; 8717 8718 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset); 8719 8720 return size; 8721 } 8722 8723 static u32 gaudi_gen_wait_cb(struct hl_device *hdev, 8724 struct hl_gen_wait_properties *prop) 8725 { 8726 struct hl_cb *cb = (struct hl_cb *) prop->data; 8727 void *buf = cb->kernel_address; 8728 u64 fence_addr = 0; 8729 u32 size = prop->size; 8730 8731 if (gaudi_get_fence_addr(hdev, prop->q_idx, &fence_addr)) { 8732 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n", 8733 prop->q_idx); 8734 return 0; 8735 } 8736 8737 size += gaudi_add_mon_pkts(buf + size, prop->mon_id, fence_addr); 8738 size += gaudi_add_arm_monitor_pkt(hdev, buf + size, prop->sob_base, 8739 prop->sob_mask, prop->sob_val, prop->mon_id); 8740 size += gaudi_add_fence_pkt(buf + size); 8741 8742 return size; 8743 } 8744 8745 static void gaudi_reset_sob(struct hl_device *hdev, void *data) 8746 { 8747 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 8748 8749 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, 8750 hw_sob->sob_id); 8751 8752 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + 8753 hw_sob->sob_id * 4, 0); 8754 8755 kref_init(&hw_sob->kref); 8756 } 8757 8758 static u64 gaudi_get_device_time(struct hl_device *hdev) 8759 { 8760 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; 8761 8762 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL); 8763 } 8764 8765 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8766 u32 *block_size, u32 *block_id) 8767 { 8768 return -EPERM; 8769 } 8770 8771 static int gaudi_block_mmap(struct hl_device *hdev, 8772 struct vm_area_struct *vma, 8773 u32 block_id, u32 block_size) 8774 { 8775 return -EPERM; 8776 } 8777 8778 static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8779 { 8780 struct cpu_dyn_regs *dyn_regs = 8781 &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs; 8782 u32 irq_handler_offset = hdev->asic_prop.gic_interrupts_enable ? 8783 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR : 8784 le32_to_cpu(dyn_regs->gic_host_ints_irq); 8785 8786 WREG32(irq_handler_offset, 8787 gaudi_irq_map_table[GAUDI_EVENT_INTS_REGISTER].cpu_id); 8788 } 8789 8790 static int gaudi_ack_mmu_page_fault_or_access_error(struct hl_device *hdev, u64 mmu_cap_mask) 8791 { 8792 return -EINVAL; 8793 } 8794 8795 static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) 8796 { 8797 switch (pll_idx) { 8798 case HL_GAUDI_CPU_PLL: return CPU_PLL; 8799 case HL_GAUDI_PCI_PLL: return PCI_PLL; 8800 case HL_GAUDI_NIC_PLL: return NIC_PLL; 8801 case HL_GAUDI_DMA_PLL: return DMA_PLL; 8802 case HL_GAUDI_MESH_PLL: return MESH_PLL; 8803 case HL_GAUDI_MME_PLL: return MME_PLL; 8804 case HL_GAUDI_TPC_PLL: return TPC_PLL; 8805 case HL_GAUDI_IF_PLL: return IF_PLL; 8806 case HL_GAUDI_SRAM_PLL: return SRAM_PLL; 8807 case HL_GAUDI_HBM_PLL: return HBM_PLL; 8808 default: return -EINVAL; 8809 } 8810 } 8811 8812 static int gaudi_add_sync_to_engine_map_entry( 8813 struct hl_sync_to_engine_map *map, u32 reg_value, 8814 enum hl_sync_engine_type engine_type, u32 engine_id) 8815 { 8816 struct hl_sync_to_engine_map_entry *entry; 8817 8818 /* Reg value represents a partial address of sync object, 8819 * it is used as unique identifier. For this we need to 8820 * clear the cutoff cfg base bits from the value. 8821 */ 8822 if (reg_value == 0 || reg_value == 0xffffffff) 8823 return 0; 8824 reg_value -= lower_32_bits(CFG_BASE); 8825 8826 /* create a new hash entry */ 8827 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 8828 if (!entry) 8829 return -ENOMEM; 8830 entry->engine_type = engine_type; 8831 entry->engine_id = engine_id; 8832 entry->sync_id = reg_value; 8833 hash_add(map->tb, &entry->node, reg_value); 8834 8835 return 0; 8836 } 8837 8838 static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev, 8839 struct hl_sync_to_engine_map *map) 8840 { 8841 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8842 int i, j, rc; 8843 u32 reg_value; 8844 8845 /* Iterate over TPC engines */ 8846 for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) { 8847 8848 reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] + 8849 sds->props[SP_NEXT_TPC] * i); 8850 8851 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8852 ENGINE_TPC, i); 8853 if (rc) 8854 goto free_sync_to_engine_map; 8855 } 8856 8857 /* Iterate over MME engines */ 8858 for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) { 8859 for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) { 8860 8861 reg_value = RREG32(sds->props[SP_MME_CFG_SO] + 8862 sds->props[SP_NEXT_MME] * i + 8863 j * sizeof(u32)); 8864 8865 rc = gaudi_add_sync_to_engine_map_entry( 8866 map, reg_value, ENGINE_MME, 8867 i * sds->props[SP_SUB_MME_ENG_NUM] + j); 8868 if (rc) 8869 goto free_sync_to_engine_map; 8870 } 8871 } 8872 8873 /* Iterate over DMA engines */ 8874 for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) { 8875 reg_value = RREG32(sds->props[SP_DMA_CFG_SO] + 8876 sds->props[SP_DMA_QUEUES_OFFSET] * i); 8877 rc = gaudi_add_sync_to_engine_map_entry(map, reg_value, 8878 ENGINE_DMA, i); 8879 if (rc) 8880 goto free_sync_to_engine_map; 8881 } 8882 8883 return 0; 8884 8885 free_sync_to_engine_map: 8886 hl_state_dump_free_sync_to_engine_map(map); 8887 8888 return rc; 8889 } 8890 8891 static int gaudi_monitor_valid(struct hl_mon_state_dump *mon) 8892 { 8893 return FIELD_GET( 8894 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK, 8895 mon->status); 8896 } 8897 8898 static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon) 8899 { 8900 const size_t max_write = 10; 8901 u32 gid, mask, sob; 8902 int i, offset; 8903 8904 /* Sync object ID is calculated as follows: 8905 * (8 * group_id + cleared bits in mask) 8906 */ 8907 gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8908 mon->arm_data); 8909 mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8910 mon->arm_data); 8911 8912 for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE - 8913 max_write; mask >>= 1, i++) { 8914 if (!(mask & 1)) { 8915 sob = gid * MONITOR_MAX_SOBS + i; 8916 8917 if (offset > 0) 8918 offset += snprintf(sobs + offset, max_write, 8919 ", "); 8920 8921 offset += snprintf(sobs + offset, max_write, "%u", sob); 8922 } 8923 } 8924 } 8925 8926 static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset, 8927 struct hl_device *hdev, 8928 struct hl_mon_state_dump *mon) 8929 { 8930 const char *name; 8931 char scratch_buf1[BIN_REG_STRING_SIZE], 8932 scratch_buf2[BIN_REG_STRING_SIZE]; 8933 char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0}; 8934 8935 name = hl_state_dump_get_monitor_name(hdev, mon); 8936 if (!name) 8937 name = ""; 8938 8939 gaudi_fill_sobs_from_mon(monitored_sobs, mon); 8940 8941 return hl_snprintf_resize( 8942 buf, size, offset, 8943 "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.", 8944 mon->id, name, 8945 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK, 8946 mon->arm_data), 8947 hl_format_as_binary( 8948 scratch_buf1, sizeof(scratch_buf1), 8949 FIELD_GET( 8950 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK, 8951 mon->arm_data)), 8952 FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK, 8953 mon->arm_data), 8954 mon->wr_data, 8955 (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low, 8956 hl_format_as_binary( 8957 scratch_buf2, sizeof(scratch_buf2), 8958 FIELD_GET( 8959 SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK, 8960 mon->status)), 8961 monitored_sobs); 8962 } 8963 8964 8965 static int gaudi_print_fences_single_engine( 8966 struct hl_device *hdev, u64 base_offset, u64 status_base_offset, 8967 enum hl_sync_engine_type engine_type, u32 engine_id, char **buf, 8968 size_t *size, size_t *offset) 8969 { 8970 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 8971 int rc = -ENOMEM, i; 8972 u32 *statuses, *fences; 8973 8974 statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES], 8975 sizeof(*statuses), GFP_KERNEL); 8976 if (!statuses) 8977 goto out; 8978 8979 fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] * 8980 sds->props[SP_ENGINE_NUM_OF_QUEUES], 8981 sizeof(*fences), GFP_KERNEL); 8982 if (!fences) 8983 goto free_status; 8984 8985 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i) 8986 statuses[i] = RREG32(status_base_offset + i * sizeof(u32)); 8987 8988 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] * 8989 sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) 8990 fences[i] = RREG32(base_offset + i * sizeof(u32)); 8991 8992 /* The actual print */ 8993 for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) { 8994 u32 fence_id; 8995 u64 fence_cnt, fence_rdata; 8996 const char *engine_name; 8997 8998 if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK, 8999 statuses[i])) 9000 continue; 9001 9002 fence_id = 9003 FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]); 9004 fence_cnt = base_offset + CFG_BASE + 9005 sizeof(u32) * 9006 (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]); 9007 fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] + 9008 sds->props[SP_FENCE0_RDATA_OFFSET]; 9009 engine_name = hl_sync_engine_to_string(engine_type); 9010 9011 rc = hl_snprintf_resize( 9012 buf, size, offset, 9013 "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n", 9014 engine_name, engine_id, 9015 i, fence_id, 9016 fence_cnt, engine_name, engine_id, fence_id, i, 9017 fence_rdata, engine_name, engine_id, fence_id, i, 9018 fences[fence_id], 9019 statuses[i]); 9020 if (rc) 9021 goto free_fences; 9022 } 9023 9024 rc = 0; 9025 9026 free_fences: 9027 kfree(fences); 9028 free_status: 9029 kfree(statuses); 9030 out: 9031 return rc; 9032 } 9033 9034 9035 static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = { 9036 .monitor_valid = gaudi_monitor_valid, 9037 .print_single_monitor = gaudi_print_single_monitor, 9038 .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map, 9039 .print_fences_single_engine = gaudi_print_fences_single_engine, 9040 }; 9041 9042 static void gaudi_state_dump_init(struct hl_device *hdev) 9043 { 9044 struct hl_state_dump_specs *sds = &hdev->state_dump_specs; 9045 int i; 9046 9047 for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i) 9048 hash_add(sds->so_id_to_str_tb, 9049 &gaudi_so_id_to_str[i].node, 9050 gaudi_so_id_to_str[i].id); 9051 9052 for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i) 9053 hash_add(sds->monitor_id_to_str_tb, 9054 &gaudi_monitor_id_to_str[i].node, 9055 gaudi_monitor_id_to_str[i].id); 9056 9057 sds->props = gaudi_state_dump_specs_props; 9058 9059 sds->sync_namager_names = gaudi_sync_manager_names; 9060 9061 sds->funcs = gaudi_state_dump_funcs; 9062 } 9063 9064 static u32 *gaudi_get_stream_master_qid_arr(void) 9065 { 9066 return gaudi_stream_master; 9067 } 9068 9069 static int gaudi_set_dram_properties(struct hl_device *hdev) 9070 { 9071 return 0; 9072 } 9073 9074 static int gaudi_set_binning_masks(struct hl_device *hdev) 9075 { 9076 return 0; 9077 } 9078 9079 static void gaudi_check_if_razwi_happened(struct hl_device *hdev) 9080 { 9081 } 9082 9083 static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) 9084 { 9085 struct hl_device *hdev = dev_get_drvdata(dev); 9086 struct cpucp_info *cpucp_info; 9087 9088 cpucp_info = &hdev->asic_prop.cpucp_info; 9089 9090 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 9091 } 9092 9093 static DEVICE_ATTR_RO(infineon_ver); 9094 9095 static struct attribute *gaudi_vrm_dev_attrs[] = { 9096 &dev_attr_infineon_ver.attr, 9097 NULL, 9098 }; 9099 9100 static void gaudi_add_device_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, 9101 struct attribute_group *dev_vrm_attr_grp) 9102 { 9103 hl_sysfs_add_dev_clk_attr(hdev, dev_clk_attr_grp); 9104 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9105 } 9106 9107 static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9108 { 9109 return 0; 9110 } 9111 9112 static const struct hl_asic_funcs gaudi_funcs = { 9113 .early_init = gaudi_early_init, 9114 .early_fini = gaudi_early_fini, 9115 .late_init = gaudi_late_init, 9116 .late_fini = gaudi_late_fini, 9117 .sw_init = gaudi_sw_init, 9118 .sw_fini = gaudi_sw_fini, 9119 .hw_init = gaudi_hw_init, 9120 .hw_fini = gaudi_hw_fini, 9121 .halt_engines = gaudi_halt_engines, 9122 .suspend = gaudi_suspend, 9123 .resume = gaudi_resume, 9124 .mmap = gaudi_mmap, 9125 .ring_doorbell = gaudi_ring_doorbell, 9126 .pqe_write = gaudi_pqe_write, 9127 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, 9128 .asic_dma_free_coherent = gaudi_dma_free_coherent, 9129 .scrub_device_mem = gaudi_scrub_device_mem, 9130 .scrub_device_dram = gaudi_scrub_device_dram, 9131 .get_int_queue_base = gaudi_get_int_queue_base, 9132 .test_queues = gaudi_test_queues, 9133 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, 9134 .asic_dma_pool_free = gaudi_dma_pool_free, 9135 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, 9136 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, 9137 .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable, 9138 .cs_parser = gaudi_cs_parser, 9139 .dma_map_sgtable = hl_asic_dma_map_sgtable, 9140 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, 9141 .update_eq_ci = gaudi_update_eq_ci, 9142 .context_switch = gaudi_context_switch, 9143 .restore_phase_topology = gaudi_restore_phase_topology, 9144 .debugfs_read_dma = gaudi_debugfs_read_dma, 9145 .add_device_attr = gaudi_add_device_attr, 9146 .handle_eqe = gaudi_handle_eqe, 9147 .get_events_stat = gaudi_get_events_stat, 9148 .read_pte = gaudi_read_pte, 9149 .write_pte = gaudi_write_pte, 9150 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, 9151 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, 9152 .mmu_prefetch_cache_range = NULL, 9153 .send_heartbeat = gaudi_send_heartbeat, 9154 .debug_coresight = gaudi_debug_coresight, 9155 .is_device_idle = gaudi_is_device_idle, 9156 .compute_reset_late_init = gaudi_compute_reset_late_init, 9157 .hw_queues_lock = gaudi_hw_queues_lock, 9158 .hw_queues_unlock = gaudi_hw_queues_unlock, 9159 .get_pci_id = gaudi_get_pci_id, 9160 .get_eeprom_data = gaudi_get_eeprom_data, 9161 .get_monitor_dump = gaudi_get_monitor_dump, 9162 .send_cpu_message = gaudi_send_cpu_message, 9163 .pci_bars_map = gaudi_pci_bars_map, 9164 .init_iatu = gaudi_init_iatu, 9165 .rreg = hl_rreg, 9166 .wreg = hl_wreg, 9167 .halt_coresight = gaudi_halt_coresight, 9168 .ctx_init = gaudi_ctx_init, 9169 .ctx_fini = gaudi_ctx_fini, 9170 .pre_schedule_cs = gaudi_pre_schedule_cs, 9171 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq, 9172 .load_firmware_to_device = gaudi_load_firmware_to_device, 9173 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device, 9174 .get_signal_cb_size = gaudi_get_signal_cb_size, 9175 .get_wait_cb_size = gaudi_get_wait_cb_size, 9176 .gen_signal_cb = gaudi_gen_signal_cb, 9177 .gen_wait_cb = gaudi_gen_wait_cb, 9178 .reset_sob = gaudi_reset_sob, 9179 .reset_sob_group = gaudi_reset_sob_group, 9180 .get_device_time = gaudi_get_device_time, 9181 .pb_print_security_errors = NULL, 9182 .collective_wait_init_cs = gaudi_collective_wait_init_cs, 9183 .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, 9184 .get_dec_base_addr = NULL, 9185 .scramble_addr = hl_mmu_scramble_addr, 9186 .descramble_addr = hl_mmu_descramble_addr, 9187 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 9188 .get_hw_block_id = gaudi_get_hw_block_id, 9189 .hw_block_mmap = gaudi_block_mmap, 9190 .enable_events_from_fw = gaudi_enable_events_from_fw, 9191 .ack_mmu_errors = gaudi_ack_mmu_page_fault_or_access_error, 9192 .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx, 9193 .init_firmware_preload_params = gaudi_init_firmware_preload_params, 9194 .init_firmware_loader = gaudi_init_firmware_loader, 9195 .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm, 9196 .state_dump_init = gaudi_state_dump_init, 9197 .get_sob_addr = gaudi_get_sob_addr, 9198 .set_pci_memory_regions = gaudi_set_pci_memory_regions, 9199 .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, 9200 .check_if_razwi_happened = gaudi_check_if_razwi_happened, 9201 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9202 .access_dev_mem = hl_access_dev_mem, 9203 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9204 .send_device_activity = gaudi_send_device_activity, 9205 .set_dram_properties = gaudi_set_dram_properties, 9206 .set_binning_masks = gaudi_set_binning_masks, 9207 }; 9208 9209 /** 9210 * gaudi_set_asic_funcs - set GAUDI function pointers 9211 * 9212 * @hdev: pointer to hl_device structure 9213 * 9214 */ 9215 void gaudi_set_asic_funcs(struct hl_device *hdev) 9216 { 9217 hdev->asic_funcs = &gaudi_funcs; 9218 } 9219