xref: /linux/drivers/gpu/drm/amd/ras/rascore/ras_aca.h (revision 24f171c7e145f43b9f187578e89b0982ce87e54c)
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Copyright 2025 Advanced Micro Devices, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef __RAS_ACA_H__
26 #define __RAS_ACA_H__
27 #include "ras.h"
28 
29 #define MAX_SOCKET_NUM_PER_HIVE 8
30 #define MAX_AID_NUM_PER_SOCKET 4
31 #define MAX_XCD_NUM_PER_AID 2
32 #define MAX_ACA_RAS_BLOCK  20
33 
34 #define ACA_ERROR__UE_MASK			(0x1 << RAS_ERR_TYPE__UE)
35 #define ACA_ERROR__CE_MASK			(0x1 << RAS_ERR_TYPE__CE)
36 #define ACA_ERROR__DE_MASK			(0x1 << RAS_ERR_TYPE__DE)
37 
38 enum ras_aca_reg_idx {
39 	ACA_REG_IDX__CTL		= 0,
40 	ACA_REG_IDX__STATUS		= 1,
41 	ACA_REG_IDX__ADDR		= 2,
42 	ACA_REG_IDX__MISC0		= 3,
43 	ACA_REG_IDX__CONFG		= 4,
44 	ACA_REG_IDX__IPID		= 5,
45 	ACA_REG_IDX__SYND		= 6,
46 	ACA_REG_IDX__DESTAT		= 8,
47 	ACA_REG_IDX__DEADDR		= 9,
48 	ACA_REG_IDX__CTL_MASK	= 10,
49 	ACA_REG_MAX_COUNT		= 16,
50 };
51 
52 struct ras_core_context;
53 struct aca_block;
54 
55 struct aca_bank_reg {
56 	u32 ecc_type;
57 	u64 seq_no;
58 	u64 regs[ACA_REG_MAX_COUNT];
59 };
60 
61 enum aca_ecc_hwip {
62 	ACA_ECC_HWIP__UNKNOWN = -1,
63 	ACA_ECC_HWIP__PSP = 0,
64 	ACA_ECC_HWIP__UMC,
65 	ACA_ECC_HWIP__SMU,
66 	ACA_ECC_HWIP__PCS_XGMI,
67 	ACA_ECC_HWIP_COUNT,
68 };
69 
70 struct aca_ecc_info {
71 	int die_id;
72 	int socket_id;
73 	int xcd_id;
74 	int hwid;
75 	int mcatype;
76 	uint64_t status;
77 	uint64_t ipid;
78 	uint64_t addr;
79 };
80 
81 struct aca_bank_ecc {
82 	struct aca_ecc_info bank_info;
83 	u32 ce_count;
84 	u32 ue_count;
85 	u32 de_count;
86 };
87 
88 struct aca_ecc_count {
89 	u32 new_ce_count;
90 	u32 total_ce_count;
91 	u32 new_ue_count;
92 	u32 total_ue_count;
93 	u32 new_de_count;
94 	u32 total_de_count;
95 };
96 
97 struct aca_xcd_ecc {
98 	struct aca_ecc_count ecc_err;
99 };
100 
101 struct aca_aid_ecc {
102 	union {
103 		struct aca_xcd {
104 			struct aca_xcd_ecc xcd[MAX_XCD_NUM_PER_AID];
105 			u32 xcd_num;
106 		} xcd;
107 		struct aca_ecc_count ecc_err;
108 	};
109 };
110 
111 struct aca_socket_ecc {
112 	struct aca_aid_ecc aid[MAX_AID_NUM_PER_SOCKET];
113 	u32 aid_num;
114 };
115 
116 struct aca_block_ecc {
117 	struct aca_socket_ecc socket[MAX_SOCKET_NUM_PER_HIVE];
118 	u32 socket_num_per_hive;
119 };
120 
121 struct aca_bank_hw_ops {
122 	bool (*bank_match)(struct aca_block *ras_blk, void *data);
123 	int (*bank_parse)(struct ras_core_context *ras_core,
124 			struct aca_block *aca_blk, void *data, void *buf);
125 };
126 
127 struct aca_block_info {
128 	char name[32];
129 	u32 ras_block_id;
130 	enum aca_ecc_hwip hwip;
131 	struct aca_bank_hw_ops bank_ops;
132 	u32 mask;
133 };
134 
135 struct aca_block {
136 	const struct aca_block_info  *blk_info;
137 	struct aca_block_ecc ecc;
138 };
139 
140 struct ras_aca_ip_func {
141 	uint32_t block_num;
142 	const struct aca_block_info **block_info;
143 };
144 
145 struct ras_aca {
146 	uint32_t aca_ip_version;
147 	const struct ras_aca_ip_func *ip_func;
148 	struct mutex  aca_lock;
149 	struct mutex  bank_op_lock;
150 	struct aca_block aca_blk[MAX_ACA_RAS_BLOCK];
151 	uint32_t ue_updated_mark;
152 };
153 
154 int ras_aca_sw_init(struct ras_core_context *ras_core);
155 int ras_aca_sw_fini(struct ras_core_context *ras_core);
156 int ras_aca_hw_init(struct ras_core_context *ras_core);
157 int ras_aca_hw_fini(struct ras_core_context *ras_core);
158 int ras_aca_get_block_ecc_count(struct ras_core_context *ras_core, u32 blk, void *data);
159 int ras_aca_clear_block_new_ecc_count(struct ras_core_context *ras_core, u32 blk);
160 int ras_aca_clear_all_blocks_ecc_count(struct ras_core_context *ras_core);
161 int ras_aca_update_ecc(struct ras_core_context *ras_core, u32 ecc_type, void *data);
162 void ras_aca_mark_fatal_flag(struct ras_core_context *ras_core);
163 void ras_aca_clear_fatal_flag(struct ras_core_context *ras_core);
164 #endif
165