1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef __RAS_ACA_H__ 26 #define __RAS_ACA_H__ 27 #include "ras.h" 28 29 #define MAX_SOCKET_NUM_PER_HIVE 8 30 #define MAX_AID_NUM_PER_SOCKET 4 31 #define MAX_XCD_NUM_PER_AID 2 32 #define MAX_ACA_RAS_BLOCK 20 33 34 #define ACA_ERROR__UE_MASK (0x1 << RAS_ERR_TYPE__UE) 35 #define ACA_ERROR__CE_MASK (0x1 << RAS_ERR_TYPE__CE) 36 #define ACA_ERROR__DE_MASK (0x1 << RAS_ERR_TYPE__DE) 37 38 enum ras_aca_reg_idx { 39 ACA_REG_IDX__CTL = 0, 40 ACA_REG_IDX__STATUS = 1, 41 ACA_REG_IDX__ADDR = 2, 42 ACA_REG_IDX__MISC0 = 3, 43 ACA_REG_IDX__CONFG = 4, 44 ACA_REG_IDX__IPID = 5, 45 ACA_REG_IDX__SYND = 6, 46 ACA_REG_IDX__DESTAT = 8, 47 ACA_REG_IDX__DEADDR = 9, 48 ACA_REG_IDX__CTL_MASK = 10, 49 ACA_REG_MAX_COUNT = 16, 50 }; 51 52 struct ras_core_context; 53 struct aca_block; 54 55 struct aca_bank_reg { 56 u32 ecc_type; 57 u64 seq_no; 58 u64 regs[ACA_REG_MAX_COUNT]; 59 }; 60 61 enum aca_ecc_hwip { 62 ACA_ECC_HWIP__UNKNOWN = -1, 63 ACA_ECC_HWIP__PSP = 0, 64 ACA_ECC_HWIP__UMC, 65 ACA_ECC_HWIP__SMU, 66 ACA_ECC_HWIP__PCS_XGMI, 67 ACA_ECC_HWIP_COUNT, 68 }; 69 70 struct aca_ecc_info { 71 int die_id; 72 int socket_id; 73 int xcd_id; 74 int hwid; 75 int mcatype; 76 uint64_t status; 77 uint64_t ipid; 78 uint64_t addr; 79 }; 80 81 struct aca_bank_ecc { 82 struct aca_ecc_info bank_info; 83 u32 ce_count; 84 u32 ue_count; 85 u32 de_count; 86 }; 87 88 struct aca_ecc_count { 89 u32 new_ce_count; 90 u32 total_ce_count; 91 u32 new_ue_count; 92 u32 total_ue_count; 93 u32 new_de_count; 94 u32 total_de_count; 95 }; 96 97 struct aca_xcd_ecc { 98 struct aca_ecc_count ecc_err; 99 }; 100 101 struct aca_aid_ecc { 102 union { 103 struct aca_xcd { 104 struct aca_xcd_ecc xcd[MAX_XCD_NUM_PER_AID]; 105 u32 xcd_num; 106 } xcd; 107 struct aca_ecc_count ecc_err; 108 }; 109 }; 110 111 struct aca_socket_ecc { 112 struct aca_aid_ecc aid[MAX_AID_NUM_PER_SOCKET]; 113 u32 aid_num; 114 }; 115 116 struct aca_block_ecc { 117 struct aca_socket_ecc socket[MAX_SOCKET_NUM_PER_HIVE]; 118 u32 socket_num_per_hive; 119 }; 120 121 struct aca_bank_hw_ops { 122 bool (*bank_match)(struct aca_block *ras_blk, void *data); 123 int (*bank_parse)(struct ras_core_context *ras_core, 124 struct aca_block *aca_blk, void *data, void *buf); 125 }; 126 127 struct aca_block_info { 128 char name[32]; 129 u32 ras_block_id; 130 enum aca_ecc_hwip hwip; 131 struct aca_bank_hw_ops bank_ops; 132 u32 mask; 133 }; 134 135 struct aca_block { 136 const struct aca_block_info *blk_info; 137 struct aca_block_ecc ecc; 138 }; 139 140 struct ras_aca_ip_func { 141 uint32_t block_num; 142 const struct aca_block_info **block_info; 143 }; 144 145 struct ras_aca { 146 uint32_t aca_ip_version; 147 const struct ras_aca_ip_func *ip_func; 148 struct mutex aca_lock; 149 struct mutex bank_op_lock; 150 struct aca_block aca_blk[MAX_ACA_RAS_BLOCK]; 151 uint32_t ue_updated_mark; 152 }; 153 154 int ras_aca_sw_init(struct ras_core_context *ras_core); 155 int ras_aca_sw_fini(struct ras_core_context *ras_core); 156 int ras_aca_hw_init(struct ras_core_context *ras_core); 157 int ras_aca_hw_fini(struct ras_core_context *ras_core); 158 int ras_aca_get_block_ecc_count(struct ras_core_context *ras_core, u32 blk, void *data); 159 int ras_aca_clear_block_new_ecc_count(struct ras_core_context *ras_core, u32 blk); 160 int ras_aca_clear_all_blocks_ecc_count(struct ras_core_context *ras_core); 161 int ras_aca_update_ecc(struct ras_core_context *ras_core, u32 ecc_type, void *data); 162 void ras_aca_mark_fatal_flag(struct ras_core_context *ras_core); 163 void ras_aca_clear_fatal_flag(struct ras_core_context *ras_core); 164 #endif 165