1 /* SPDX-License-Identifier: MIT */ 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #ifndef __RAS_UMC_H__ 26 #define __RAS_UMC_H__ 27 #include "ras.h" 28 #include "ras_eeprom.h" 29 #include "ras_cmd.h" 30 31 #define UMC_VRAM_TYPE_UNKNOWN 0 32 #define UMC_VRAM_TYPE_GDDR1 1 33 #define UMC_VRAM_TYPE_DDR2 2 34 #define UMC_VRAM_TYPE_GDDR3 3 35 #define UMC_VRAM_TYPE_GDDR4 4 36 #define UMC_VRAM_TYPE_GDDR5 5 37 #define UMC_VRAM_TYPE_HBM 6 38 #define UMC_VRAM_TYPE_DDR3 7 39 #define UMC_VRAM_TYPE_DDR4 8 40 #define UMC_VRAM_TYPE_GDDR6 9 41 #define UMC_VRAM_TYPE_DDR5 10 42 #define UMC_VRAM_TYPE_LPDDR4 11 43 #define UMC_VRAM_TYPE_LPDDR5 12 44 #define UMC_VRAM_TYPE_HBM3E 13 45 46 #define UMC_ECC_NEW_DETECTED_TAG 0x1 47 #define UMC_INV_MEM_PFN (0xFFFFFFFFFFFFFFFF) 48 49 /* three column bits and one row bit in MCA address flip 50 * in bad page retirement 51 */ 52 #define UMC_PA_FLIP_BITS_NUM 4 53 54 enum umc_memory_partition_mode { 55 UMC_MEMORY_PARTITION_MODE_NONE = 0, 56 UMC_MEMORY_PARTITION_MODE_NPS1 = 1, 57 UMC_MEMORY_PARTITION_MODE_NPS2 = 2, 58 UMC_MEMORY_PARTITION_MODE_NPS3 = 3, 59 UMC_MEMORY_PARTITION_MODE_NPS4 = 4, 60 UMC_MEMORY_PARTITION_MODE_NPS6 = 6, 61 UMC_MEMORY_PARTITION_MODE_NPS8 = 8, 62 UMC_MEMORY_PARTITION_MODE_UNKNOWN 63 }; 64 65 struct ras_core_context; 66 struct ras_bank_ecc; 67 68 struct umc_flip_bits { 69 uint32_t flip_bits_in_pa[UMC_PA_FLIP_BITS_NUM]; 70 uint32_t flip_row_bit; 71 uint32_t r13_in_pa; 72 uint32_t bit_num; 73 }; 74 75 struct umc_mca_addr { 76 uint64_t err_addr; 77 uint32_t ch_inst; 78 uint32_t umc_inst; 79 uint32_t node_inst; 80 uint32_t socket_id; 81 }; 82 83 struct umc_phy_addr { 84 uint64_t pa; 85 uint32_t bank; 86 uint32_t channel_idx; 87 }; 88 89 struct umc_bank_addr { 90 uint32_t stack_id; /* SID */ 91 uint32_t bank_group; 92 uint32_t bank; 93 uint32_t row; 94 uint32_t column; 95 uint32_t channel; 96 uint32_t subchannel; /* Also called Pseudochannel (PC) */ 97 }; 98 99 struct ras_umc_ip_func { 100 int (*bank_to_eeprom_record)(struct ras_core_context *ras_core, 101 struct ras_bank_ecc *bank, struct eeprom_umc_record *record); 102 int (*eeprom_record_to_nps_record)(struct ras_core_context *ras_core, 103 struct eeprom_umc_record *record, uint32_t nps); 104 int (*eeprom_record_to_nps_pages)(struct ras_core_context *ras_core, 105 struct eeprom_umc_record *record, uint32_t nps, 106 uint64_t *pfns, uint32_t num); 107 int (*bank_to_soc_pa)(struct ras_core_context *ras_core, 108 struct umc_bank_addr bank_addr, uint64_t *soc_pa); 109 int (*soc_pa_to_bank)(struct ras_core_context *ras_core, 110 uint64_t soc_pa, struct umc_bank_addr *bank_addr); 111 }; 112 113 struct eeprom_store_record { 114 /* point to data records array */ 115 struct eeprom_umc_record *bps; 116 /* the count of entries */ 117 int count; 118 /* the space can place new entries */ 119 int space_left; 120 }; 121 122 struct ras_umc_err_data { 123 struct eeprom_store_record rom_data; 124 struct eeprom_store_record ram_data; 125 enum umc_memory_partition_mode umc_nps_mode; 126 uint64_t last_retired_pfn; 127 }; 128 129 struct ras_umc { 130 u32 umc_ip_version; 131 u32 umc_vram_type; 132 const struct ras_umc_ip_func *ip_func; 133 struct radix_tree_root root; 134 struct mutex tree_lock; 135 struct mutex umc_lock; 136 struct mutex bank_log_lock; 137 struct mutex pending_ecc_lock; 138 struct ras_umc_err_data umc_err_data; 139 struct list_head pending_ecc_list; 140 }; 141 142 int ras_umc_sw_init(struct ras_core_context *ras); 143 int ras_umc_sw_fini(struct ras_core_context *ras); 144 int ras_umc_hw_init(struct ras_core_context *ras); 145 int ras_umc_hw_fini(struct ras_core_context *ras); 146 int ras_umc_psp_convert_ma_to_pa(struct ras_core_context *ras_core, 147 struct umc_mca_addr *in, struct umc_phy_addr *out, 148 uint32_t nps); 149 int ras_umc_handle_bad_pages(struct ras_core_context *ras_core, void *data); 150 int ras_umc_log_bad_bank(struct ras_core_context *ras, struct ras_bank_ecc *bank); 151 int ras_umc_log_bad_bank_pending(struct ras_core_context *ras_core, struct ras_bank_ecc *bank); 152 int ras_umc_log_pending_bad_bank(struct ras_core_context *ras_core); 153 int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core); 154 int ras_umc_load_bad_pages(struct ras_core_context *ras_core); 155 int ras_umc_get_saved_eeprom_count(struct ras_core_context *ras_core); 156 int ras_umc_clean_badpage_data(struct ras_core_context *ras_core); 157 int ras_umc_fill_eeprom_record(struct ras_core_context *ras_core, 158 uint64_t err_addr, uint32_t umc_inst, struct umc_phy_addr *cur_nps_addr, 159 enum umc_memory_partition_mode cur_nps, struct eeprom_umc_record *record); 160 161 int ras_umc_get_badpage_count(struct ras_core_context *ras_core); 162 int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record); 163 bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr); 164 int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core, 165 uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa); 166 #endif 167