1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 #include "ras.h" 25 #include "ras_core_status.h" 26 #include "ras_log_ring.h" 27 28 #define RAS_LOG_MAX_QUERY_SIZE 0xC000 29 #define RAS_LOG_MEM_TEMP_SIZE 0x200 30 #define RAS_LOG_MEMPOOL_SIZE \ 31 (RAS_LOG_MAX_QUERY_SIZE + RAS_LOG_MEM_TEMP_SIZE) 32 33 #define BATCH_IDX_TO_TREE_IDX(batch_idx, sn) (((batch_idx) << 8) | (sn)) 34 35 static const uint64_t ras_rma_aca_reg[ACA_REG_MAX_COUNT] = { 36 [ACA_REG_IDX__CTL] = 0x1, 37 [ACA_REG_IDX__STATUS] = 0xB000000000000137, 38 [ACA_REG_IDX__ADDR] = 0x0, 39 [ACA_REG_IDX__MISC0] = 0x0, 40 [ACA_REG_IDX__CONFG] = 0x1ff00000002, 41 [ACA_REG_IDX__IPID] = 0x9600000000, 42 [ACA_REG_IDX__SYND] = 0x0, 43 }; 44 45 static uint64_t ras_log_ring_get_logged_ecc_count(struct ras_core_context *ras_core) 46 { 47 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 48 uint64_t count = 0; 49 50 if (log_ring->logged_ecc_count < 0) { 51 RAS_DEV_WARN(ras_core->dev, 52 "Error: the logged ras count should not less than 0!\n"); 53 count = 0; 54 } else { 55 count = log_ring->logged_ecc_count; 56 } 57 58 if (count > RAS_LOG_MEMPOOL_SIZE) 59 RAS_DEV_WARN(ras_core->dev, 60 "Error: the logged ras count is out of range!\n"); 61 62 return count; 63 } 64 65 static int ras_log_ring_add_data(struct ras_core_context *ras_core, 66 struct ras_log_info *log, struct ras_log_batch_tag *batch_tag) 67 { 68 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 69 unsigned long flags = 0; 70 int ret = 0; 71 72 if (batch_tag && (batch_tag->sub_seqno >= MAX_RECORD_PER_BATCH)) { 73 RAS_DEV_ERR(ras_core->dev, 74 "Invalid batch sub seqno:%d, batch:0x%llx\n", 75 batch_tag->sub_seqno, batch_tag->batch_id); 76 return -EINVAL; 77 } 78 79 spin_lock_irqsave(&log_ring->spin_lock, flags); 80 if (batch_tag) { 81 log->seqno = 82 BATCH_IDX_TO_TREE_IDX(batch_tag->batch_id, batch_tag->sub_seqno); 83 batch_tag->sub_seqno++; 84 } else { 85 log->seqno = BATCH_IDX_TO_TREE_IDX(log_ring->mono_upward_batch_id, 0); 86 log_ring->mono_upward_batch_id++; 87 } 88 ret = radix_tree_insert(&log_ring->ras_log_root, log->seqno, log); 89 if (!ret) 90 log_ring->logged_ecc_count++; 91 spin_unlock_irqrestore(&log_ring->spin_lock, flags); 92 93 if (ret) { 94 RAS_DEV_ERR(ras_core->dev, 95 "Failed to add ras log! seqno:0x%llx, ret:%d\n", 96 log->seqno, ret); 97 mempool_free(log, log_ring->ras_log_mempool); 98 } 99 100 return ret; 101 } 102 103 static int ras_log_ring_delete_data(struct ras_core_context *ras_core, uint32_t count) 104 { 105 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 106 unsigned long flags = 0; 107 uint32_t i = 0, j = 0; 108 uint64_t batch_id, idx; 109 void *data; 110 int ret = -ENODATA; 111 112 if (count > ras_log_ring_get_logged_ecc_count(ras_core)) 113 return -EINVAL; 114 115 spin_lock_irqsave(&log_ring->spin_lock, flags); 116 batch_id = log_ring->last_del_batch_id; 117 while (batch_id < log_ring->mono_upward_batch_id) { 118 for (j = 0; j < MAX_RECORD_PER_BATCH; j++) { 119 idx = BATCH_IDX_TO_TREE_IDX(batch_id, j); 120 data = radix_tree_delete(&log_ring->ras_log_root, idx); 121 if (data) { 122 mempool_free(data, log_ring->ras_log_mempool); 123 log_ring->logged_ecc_count--; 124 i++; 125 } 126 } 127 batch_id = ++log_ring->last_del_batch_id; 128 if (i >= count) { 129 ret = 0; 130 break; 131 } 132 } 133 spin_unlock_irqrestore(&log_ring->spin_lock, flags); 134 135 return ret; 136 } 137 138 static void ras_log_ring_clear_log_tree(struct ras_core_context *ras_core) 139 { 140 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 141 uint64_t batch_id, idx; 142 unsigned long flags = 0; 143 void *data; 144 int j; 145 146 if ((log_ring->mono_upward_batch_id <= log_ring->last_del_batch_id) && 147 !log_ring->logged_ecc_count) 148 return; 149 150 spin_lock_irqsave(&log_ring->spin_lock, flags); 151 batch_id = log_ring->last_del_batch_id; 152 while (batch_id < log_ring->mono_upward_batch_id) { 153 for (j = 0; j < MAX_RECORD_PER_BATCH; j++) { 154 idx = BATCH_IDX_TO_TREE_IDX(batch_id, j); 155 data = radix_tree_delete(&log_ring->ras_log_root, idx); 156 if (data) { 157 mempool_free(data, log_ring->ras_log_mempool); 158 log_ring->logged_ecc_count--; 159 } 160 } 161 batch_id++; 162 } 163 spin_unlock_irqrestore(&log_ring->spin_lock, flags); 164 165 } 166 167 int ras_log_ring_sw_init(struct ras_core_context *ras_core) 168 { 169 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 170 171 memset(log_ring, 0, sizeof(*log_ring)); 172 173 log_ring->ras_log_mempool = mempool_create_kmalloc_pool( 174 RAS_LOG_MEMPOOL_SIZE, sizeof(struct ras_log_info)); 175 if (!log_ring->ras_log_mempool) 176 return -ENOMEM; 177 178 INIT_RADIX_TREE(&log_ring->ras_log_root, GFP_KERNEL); 179 180 spin_lock_init(&log_ring->spin_lock); 181 182 return 0; 183 } 184 185 int ras_log_ring_sw_fini(struct ras_core_context *ras_core) 186 { 187 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 188 189 ras_log_ring_clear_log_tree(ras_core); 190 log_ring->logged_ecc_count = 0; 191 log_ring->last_del_batch_id = 0; 192 log_ring->mono_upward_batch_id = 0; 193 194 mempool_destroy(log_ring->ras_log_mempool); 195 196 return 0; 197 } 198 199 struct ras_log_batch_tag *ras_log_ring_create_batch_tag(struct ras_core_context *ras_core) 200 { 201 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 202 struct ras_log_batch_tag *batch_tag; 203 unsigned long flags = 0; 204 205 batch_tag = kzalloc_obj(*batch_tag); 206 if (!batch_tag) 207 return NULL; 208 209 spin_lock_irqsave(&log_ring->spin_lock, flags); 210 batch_tag->batch_id = log_ring->mono_upward_batch_id; 211 log_ring->mono_upward_batch_id++; 212 spin_unlock_irqrestore(&log_ring->spin_lock, flags); 213 214 batch_tag->sub_seqno = 0; 215 batch_tag->timestamp = ras_core_get_utc_second_timestamp(ras_core); 216 return batch_tag; 217 } 218 219 void ras_log_ring_destroy_batch_tag(struct ras_core_context *ras_core, 220 struct ras_log_batch_tag *batch_tag) 221 { 222 kfree(batch_tag); 223 } 224 225 void ras_log_ring_add_log_event(struct ras_core_context *ras_core, 226 enum ras_log_event event, void *data, struct ras_log_batch_tag *batch_tag) 227 { 228 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 229 struct device_system_info dev_info = {0}; 230 struct ras_log_info *log; 231 uint64_t socket_id; 232 void *obj; 233 234 obj = mempool_alloc_preallocated(log_ring->ras_log_mempool); 235 if (!obj || 236 (ras_log_ring_get_logged_ecc_count(ras_core) >= RAS_LOG_MEMPOOL_SIZE)) { 237 ras_log_ring_delete_data(ras_core, RAS_LOG_MEM_TEMP_SIZE); 238 if (!obj) 239 obj = mempool_alloc_preallocated(log_ring->ras_log_mempool); 240 } 241 242 if (!obj) { 243 RAS_DEV_ERR(ras_core->dev, "ERROR: Failed to alloc ras log buffer!\n"); 244 return; 245 } 246 247 log = (struct ras_log_info *)obj; 248 249 memset(log, 0, sizeof(*log)); 250 log->timestamp = 251 batch_tag ? batch_tag->timestamp : ras_core_get_utc_second_timestamp(ras_core); 252 log->event = event; 253 254 if (data) 255 memcpy(&log->aca_reg, data, sizeof(log->aca_reg)); 256 257 if (event == RAS_LOG_EVENT_RMA) { 258 memcpy(&log->aca_reg, ras_rma_aca_reg, sizeof(log->aca_reg)); 259 ras_core_get_device_system_info(ras_core, &dev_info); 260 socket_id = dev_info.socket_id; 261 log->aca_reg.regs[ACA_REG_IDX__IPID] |= ((socket_id / 4) & 0x01); 262 log->aca_reg.regs[ACA_REG_IDX__IPID] |= (((socket_id % 4) & 0x3) << 44); 263 } 264 265 ras_log_ring_add_data(ras_core, log, batch_tag); 266 } 267 268 static int ras_log_ring_lookup_data(struct ras_core_context *ras_core, 269 uint64_t idx, struct ras_log_info *log) 270 { 271 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 272 unsigned long flags = 0; 273 void *data; 274 275 spin_lock_irqsave(&log_ring->spin_lock, flags); 276 data = radix_tree_lookup(&log_ring->ras_log_root, idx); 277 if (data) 278 memcpy(log, data, sizeof(*log)); 279 spin_unlock_irqrestore(&log_ring->spin_lock, flags); 280 281 return data ? 0 : -ENODATA; 282 } 283 284 int ras_log_ring_get_batch_records(struct ras_core_context *ras_core, uint64_t batch_id, 285 struct ras_log_info *log_arr, uint32_t arr_num) 286 { 287 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 288 uint32_t i, idx, count = 0; 289 290 if (!log_arr || !arr_num || (batch_id >= log_ring->mono_upward_batch_id) || 291 (batch_id < log_ring->last_del_batch_id)) 292 return -EINVAL; 293 294 for (i = 0; i < MAX_RECORD_PER_BATCH && i < arr_num; i++) { 295 idx = BATCH_IDX_TO_TREE_IDX(batch_id, i); 296 if (!ras_log_ring_lookup_data(ras_core, idx, &log_arr[count])) 297 count++; 298 } 299 300 return count; 301 } 302 303 int ras_log_ring_get_batch_overview(struct ras_core_context *ras_core, 304 struct ras_log_batch_overview *overview) 305 { 306 struct ras_log_ring *log_ring = &ras_core->ras_log_ring; 307 308 overview->logged_batch_count = 309 log_ring->mono_upward_batch_id - log_ring->last_del_batch_id; 310 overview->last_batch_id = log_ring->mono_upward_batch_id; 311 overview->first_batch_id = log_ring->last_del_batch_id; 312 313 return 0; 314 } 315