xref: /linux/drivers/gpu/drm/amd/ras/rascore/ras_umc.h (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright 2025 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef __RAS_UMC_H__
26 #define __RAS_UMC_H__
27 #include "ras.h"
28 #include "ras_eeprom.h"
29 #include "ras_cmd.h"
30 
31 #define UMC_VRAM_TYPE_UNKNOWN 0
32 #define UMC_VRAM_TYPE_GDDR1   1
33 #define UMC_VRAM_TYPE_DDR2    2
34 #define UMC_VRAM_TYPE_GDDR3   3
35 #define UMC_VRAM_TYPE_GDDR4   4
36 #define UMC_VRAM_TYPE_GDDR5   5
37 #define UMC_VRAM_TYPE_HBM     6
38 #define UMC_VRAM_TYPE_DDR3    7
39 #define UMC_VRAM_TYPE_DDR4    8
40 #define UMC_VRAM_TYPE_GDDR6   9
41 #define UMC_VRAM_TYPE_DDR5    10
42 #define UMC_VRAM_TYPE_LPDDR4  11
43 #define UMC_VRAM_TYPE_LPDDR5  12
44 #define UMC_VRAM_TYPE_HBM3E   13
45 
46 #define UMC_ECC_NEW_DETECTED_TAG       0x1
47 #define UMC_INV_MEM_PFN  (0xFFFFFFFFFFFFFFFF)
48 
49 /* three column bits and one row bit in MCA address flip
50  * in bad page retirement
51  */
52 #define UMC_PA_FLIP_BITS_NUM 4
53 
54 enum umc_memory_partition_mode {
55 	UMC_MEMORY_PARTITION_MODE_NONE = 0,
56 	UMC_MEMORY_PARTITION_MODE_NPS1 = 1,
57 	UMC_MEMORY_PARTITION_MODE_NPS2 = 2,
58 	UMC_MEMORY_PARTITION_MODE_NPS3 = 3,
59 	UMC_MEMORY_PARTITION_MODE_NPS4 = 4,
60 	UMC_MEMORY_PARTITION_MODE_NPS6 = 6,
61 	UMC_MEMORY_PARTITION_MODE_NPS8 = 8,
62 	UMC_MEMORY_PARTITION_MODE_UNKNOWN
63 };
64 
65 struct ras_core_context;
66 struct ras_bank_ecc;
67 
68 struct umc_flip_bits {
69 	uint32_t flip_bits_in_pa[UMC_PA_FLIP_BITS_NUM];
70 	uint32_t flip_row_bit;
71 	uint32_t r13_in_pa;
72 	uint32_t bit_num;
73 };
74 
75 struct umc_mca_addr {
76 	uint64_t err_addr;
77 	uint32_t ch_inst;
78 	uint32_t umc_inst;
79 	uint32_t node_inst;
80 	uint32_t socket_id;
81 };
82 
83 struct umc_phy_addr {
84 	uint64_t pa;
85 	uint32_t bank;
86 	uint32_t channel_idx;
87 };
88 
89 struct umc_bank_addr {
90 	uint32_t stack_id; /* SID */
91 	uint32_t bank_group;
92 	uint32_t bank;
93 	uint32_t row;
94 	uint32_t column;
95 	uint32_t channel;
96 	uint32_t subchannel; /* Also called Pseudochannel (PC) */
97 };
98 
99 struct ras_umc_ip_func {
100 	int (*bank_to_eeprom_record)(struct ras_core_context *ras_core,
101 			struct ras_bank_ecc *bank, struct eeprom_umc_record *record);
102 	int (*eeprom_record_to_nps_record)(struct ras_core_context *ras_core,
103 			struct eeprom_umc_record *record, uint32_t nps);
104 	int (*eeprom_record_to_nps_pages)(struct ras_core_context *ras_core,
105 			struct eeprom_umc_record *record, uint32_t nps,
106 			uint64_t *pfns, uint32_t num);
107 	int (*bank_to_soc_pa)(struct ras_core_context *ras_core,
108 			struct umc_bank_addr bank_addr, uint64_t *soc_pa);
109 	int (*soc_pa_to_bank)(struct ras_core_context *ras_core,
110 			uint64_t soc_pa, struct umc_bank_addr *bank_addr);
111 };
112 
113 struct eeprom_store_record {
114 	/* point to data records array */
115 	struct eeprom_umc_record *bps;
116 	/* the count of entries */
117 	int count;
118 	/* the space can place new entries */
119 	int space_left;
120 };
121 
122 struct ras_umc_err_data {
123 	struct eeprom_store_record rom_data;
124 	struct eeprom_store_record ram_data;
125 	enum umc_memory_partition_mode umc_nps_mode;
126 	uint64_t last_retired_pfn;
127 };
128 
129 struct ras_umc {
130 	u32 umc_ip_version;
131 	u32 umc_vram_type;
132 	const struct ras_umc_ip_func *ip_func;
133 	struct radix_tree_root root;
134 	struct mutex  tree_lock;
135 	struct mutex  umc_lock;
136 	struct mutex  bank_log_lock;
137 	struct mutex  pending_ecc_lock;
138 	struct ras_umc_err_data umc_err_data;
139 	struct list_head pending_ecc_list;
140 };
141 
142 int ras_umc_sw_init(struct ras_core_context *ras);
143 int ras_umc_sw_fini(struct ras_core_context *ras);
144 int ras_umc_hw_init(struct ras_core_context *ras);
145 int ras_umc_hw_fini(struct ras_core_context *ras);
146 int ras_umc_psp_convert_ma_to_pa(struct ras_core_context *ras_core,
147 		struct umc_mca_addr *in, struct umc_phy_addr *out,
148 		uint32_t nps);
149 int ras_umc_handle_bad_pages(struct ras_core_context *ras_core, void *data);
150 int ras_umc_log_bad_bank(struct ras_core_context *ras, struct ras_bank_ecc *bank);
151 int ras_umc_log_bad_bank_pending(struct ras_core_context *ras_core, struct ras_bank_ecc *bank);
152 int ras_umc_log_pending_bad_bank(struct ras_core_context *ras_core);
153 int ras_umc_clear_logged_ecc(struct ras_core_context *ras_core);
154 int ras_umc_load_bad_pages(struct ras_core_context *ras_core);
155 int ras_umc_get_saved_eeprom_count(struct ras_core_context *ras_core);
156 int ras_umc_clean_badpage_data(struct ras_core_context *ras_core);
157 int ras_umc_fill_eeprom_record(struct ras_core_context *ras_core,
158 		uint64_t err_addr, uint32_t umc_inst, struct umc_phy_addr *cur_nps_addr,
159 		enum umc_memory_partition_mode cur_nps, struct eeprom_umc_record *record);
160 
161 int ras_umc_get_badpage_count(struct ras_core_context *ras_core);
162 int ras_umc_get_badpage_record(struct ras_core_context *ras_core, uint32_t index, void *record);
163 bool ras_umc_check_retired_addr(struct ras_core_context *ras_core, uint64_t addr);
164 int ras_umc_translate_soc_pa_and_bank(struct ras_core_context *ras_core,
165 			uint64_t *soc_pa, struct umc_bank_addr *bank_addr, bool bank_to_pa);
166 #endif
167