1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2025 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 #include <linux/list.h> 25 #include "amdgpu.h" 26 #include "amdgpu_ras_mgr.h" 27 28 static const guid_t MCE = CPER_NOTIFY_MCE; 29 static const guid_t CMC = CPER_NOTIFY_CMC; 30 static const guid_t BOOT = BOOT_TYPE; 31 32 static const guid_t CRASHDUMP = AMD_CRASHDUMP; 33 static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR; 34 35 #define CPER_SIGNATURE_SZ (sizeof(((struct cper_hdr *)0)->signature)) 36 37 static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size) 38 { 39 hdr->record_length += size; 40 } 41 42 static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp) 43 { 44 struct tm tm; 45 time64_t now = ktime_get_real_seconds(); 46 47 time64_to_tm(now, 0, &tm); 48 timestamp->seconds = tm.tm_sec; 49 timestamp->minutes = tm.tm_min; 50 timestamp->hours = tm.tm_hour; 51 timestamp->flag = 0; 52 timestamp->day = tm.tm_mday; 53 timestamp->month = 1 + tm.tm_mon; 54 timestamp->year = (1900 + tm.tm_year) % 100; 55 timestamp->century = (1900 + tm.tm_year) / 100; 56 } 57 58 void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev, 59 struct cper_hdr *hdr, 60 enum amdgpu_cper_type type, 61 enum cper_error_severity sev) 62 { 63 char record_id[16]; 64 65 hdr->signature[0] = 'C'; 66 hdr->signature[1] = 'P'; 67 hdr->signature[2] = 'E'; 68 hdr->signature[3] = 'R'; 69 hdr->revision = CPER_HDR_REV_1; 70 hdr->signature_end = 0xFFFFFFFF; 71 hdr->error_severity = sev; 72 73 hdr->valid_bits.platform_id = 1; 74 hdr->valid_bits.timestamp = 1; 75 76 amdgpu_cper_get_timestamp(&hdr->timestamp); 77 78 snprintf(record_id, 9, "%d:%X", 79 (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? 80 adev->smuio.funcs->get_socket_id(adev) : 81 0, 82 atomic_inc_return(&adev->cper.unique_id)); 83 memcpy(hdr->record_id, record_id, 8); 84 85 snprintf(hdr->platform_id, 16, "0x%04X:0x%04X", 86 adev->pdev->vendor, adev->pdev->device); 87 /* pmfw version should be part of creator_id according to CPER spec */ 88 snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU); 89 90 switch (type) { 91 case AMDGPU_CPER_TYPE_BOOT: 92 hdr->notify_type = BOOT; 93 break; 94 case AMDGPU_CPER_TYPE_FATAL: 95 case AMDGPU_CPER_TYPE_BP_THRESHOLD: 96 hdr->notify_type = MCE; 97 break; 98 case AMDGPU_CPER_TYPE_RUNTIME: 99 if (sev == CPER_SEV_NON_FATAL_CORRECTED) 100 hdr->notify_type = CMC; 101 else 102 hdr->notify_type = MCE; 103 break; 104 default: 105 dev_err(adev->dev, "Unknown CPER Type\n"); 106 break; 107 } 108 109 __inc_entry_length(hdr, HDR_LEN); 110 } 111 112 static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev, 113 struct cper_sec_desc *section_desc, 114 bool bp_threshold, 115 bool poison, 116 enum cper_error_severity sev, 117 guid_t sec_type, 118 uint32_t section_length, 119 uint32_t section_offset) 120 { 121 section_desc->revision_minor = CPER_SEC_MINOR_REV_1; 122 section_desc->revision_major = CPER_SEC_MAJOR_REV_22; 123 section_desc->sec_offset = section_offset; 124 section_desc->sec_length = section_length; 125 section_desc->valid_bits.fru_text = 1; 126 section_desc->flag_bits.primary = 1; 127 section_desc->severity = sev; 128 section_desc->sec_type = sec_type; 129 130 snprintf(section_desc->fru_text, 20, "OAM%d", 131 (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? 132 adev->smuio.funcs->get_socket_id(adev) : 133 0); 134 135 if (bp_threshold) 136 section_desc->flag_bits.exceed_err_threshold = 1; 137 if (poison) 138 section_desc->flag_bits.latent_err = 1; 139 140 return 0; 141 } 142 143 int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev, 144 struct cper_hdr *hdr, 145 uint32_t idx, 146 struct cper_sec_crashdump_reg_data reg_data) 147 { 148 struct cper_sec_desc *section_desc; 149 struct cper_sec_crashdump_fatal *section; 150 151 section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); 152 section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr + 153 FATAL_SEC_OFFSET(hdr->sec_cnt, idx)); 154 155 amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false, 156 CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN, 157 FATAL_SEC_OFFSET(hdr->sec_cnt, idx)); 158 159 section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH; 160 section->body.reg_arr_size = sizeof(reg_data); 161 section->body.data = reg_data; 162 163 __inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN); 164 165 return 0; 166 } 167 168 int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev, 169 struct cper_hdr *hdr, 170 uint32_t idx, 171 enum cper_error_severity sev, 172 uint32_t *reg_dump, 173 uint32_t reg_count) 174 { 175 struct cper_sec_desc *section_desc; 176 struct cper_sec_nonstd_err *section; 177 bool poison; 178 179 poison = sev != CPER_SEV_NON_FATAL_CORRECTED; 180 section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); 181 section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + 182 NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); 183 184 amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison, 185 sev, RUNTIME, NONSTD_SEC_LEN, 186 NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); 187 188 reg_count = umin(reg_count, CPER_ACA_REG_COUNT); 189 190 section->hdr.valid_bits.err_info_cnt = 1; 191 section->hdr.valid_bits.err_context_cnt = 1; 192 193 section->info.error_type = RUNTIME; 194 section->info.ms_chk_bits.err_type_valid = 1; 195 section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH; 196 section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump); 197 198 memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t)); 199 200 __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN); 201 202 return 0; 203 } 204 205 int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev, 206 struct cper_hdr *hdr, 207 uint32_t idx) 208 { 209 struct cper_sec_desc *section_desc; 210 struct cper_sec_nonstd_err *section; 211 uint32_t socket_id; 212 213 section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); 214 section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + 215 NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); 216 217 amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false, 218 CPER_SEV_FATAL, RUNTIME, NONSTD_SEC_LEN, 219 NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); 220 221 section->hdr.valid_bits.err_info_cnt = 1; 222 section->hdr.valid_bits.err_context_cnt = 1; 223 224 section->info.error_type = RUNTIME; 225 section->info.valid_bits.ms_chk = 1; 226 section->info.ms_chk_bits.err_type_valid = 1; 227 section->info.ms_chk_bits.err_type = 1; 228 section->info.ms_chk_bits.pcc = 1; 229 section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH; 230 section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump); 231 232 /* Hardcoded Reg dump for bad page threshold CPER */ 233 socket_id = (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? 234 adev->smuio.funcs->get_socket_id(adev) : 235 0; 236 section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1; 237 section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0; 238 section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137; 239 section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000; 240 section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0; 241 section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0; 242 section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0; 243 section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0; 244 section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2; 245 section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff; 246 section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = (socket_id / 4) & 0x01; 247 section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x096 | (((socket_id % 4) & 0x3) << 12); 248 section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0; 249 section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0; 250 251 __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN); 252 253 return 0; 254 } 255 256 struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev, 257 enum amdgpu_cper_type type, 258 uint16_t section_count) 259 { 260 struct cper_hdr *hdr; 261 uint32_t size = 0; 262 263 size += HDR_LEN; 264 size += (SEC_DESC_LEN * section_count); 265 266 switch (type) { 267 case AMDGPU_CPER_TYPE_RUNTIME: 268 case AMDGPU_CPER_TYPE_BP_THRESHOLD: 269 size += (NONSTD_SEC_LEN * section_count); 270 break; 271 case AMDGPU_CPER_TYPE_FATAL: 272 size += (FATAL_SEC_LEN * section_count); 273 break; 274 case AMDGPU_CPER_TYPE_BOOT: 275 size += (BOOT_SEC_LEN * section_count); 276 break; 277 default: 278 dev_err(adev->dev, "Unknown CPER Type!\n"); 279 return NULL; 280 } 281 282 hdr = kzalloc(size, GFP_KERNEL); 283 if (!hdr) 284 return NULL; 285 286 /* Save this early */ 287 hdr->sec_cnt = section_count; 288 289 return hdr; 290 } 291 292 int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev, 293 struct aca_bank *bank) 294 { 295 struct cper_hdr *fatal = NULL; 296 struct cper_sec_crashdump_reg_data reg_data = { 0 }; 297 struct amdgpu_ring *ring = &adev->cper.ring_buf; 298 int ret; 299 300 fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1); 301 if (!fatal) { 302 dev_err(adev->dev, "fail to alloc cper entry for ue record\n"); 303 return -ENOMEM; 304 } 305 306 reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]); 307 reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]); 308 reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]); 309 reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]); 310 reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]); 311 reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]); 312 reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]); 313 reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]); 314 315 amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL); 316 ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data); 317 if (ret) 318 return ret; 319 320 amdgpu_cper_ring_write(ring, fatal, fatal->record_length); 321 kfree(fatal); 322 323 return 0; 324 } 325 326 int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev) 327 { 328 struct cper_hdr *bp_threshold = NULL; 329 struct amdgpu_ring *ring = &adev->cper.ring_buf; 330 int ret; 331 332 bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1); 333 if (!bp_threshold) { 334 dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n"); 335 return -ENOMEM; 336 } 337 338 amdgpu_cper_entry_fill_hdr(adev, bp_threshold, 339 AMDGPU_CPER_TYPE_BP_THRESHOLD, 340 CPER_SEV_FATAL); 341 ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0); 342 if (ret) 343 return ret; 344 345 amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length); 346 kfree(bp_threshold); 347 348 return 0; 349 } 350 351 static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev, 352 enum aca_error_type aca_err_type) 353 { 354 switch (aca_err_type) { 355 case ACA_ERROR_TYPE_UE: 356 return CPER_SEV_FATAL; 357 case ACA_ERROR_TYPE_CE: 358 return CPER_SEV_NON_FATAL_CORRECTED; 359 case ACA_ERROR_TYPE_DEFERRED: 360 return CPER_SEV_NON_FATAL_UNCORRECTED; 361 default: 362 dev_err(adev->dev, "Unknown ACA error type!\n"); 363 return CPER_SEV_FATAL; 364 } 365 } 366 367 int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev, 368 struct aca_banks *banks, 369 uint16_t bank_count) 370 { 371 struct cper_hdr *corrected = NULL; 372 enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED; 373 struct amdgpu_ring *ring = &adev->cper.ring_buf; 374 uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 }; 375 struct aca_bank_node *node; 376 struct aca_bank *bank; 377 uint32_t i = 0; 378 int ret; 379 380 corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count); 381 if (!corrected) { 382 dev_err(adev->dev, "fail to allocate cper entry for ce records\n"); 383 return -ENOMEM; 384 } 385 386 /* Raise severity if any DE is detected in the ACA bank list */ 387 list_for_each_entry(node, &banks->list, node) { 388 bank = &node->bank; 389 if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { 390 sev = CPER_SEV_NON_FATAL_UNCORRECTED; 391 break; 392 } 393 } 394 395 amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev); 396 397 /* Combine CE and DE in cper record */ 398 list_for_each_entry(node, &banks->list, node) { 399 bank = &node->bank; 400 reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]); 401 reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]); 402 reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]); 403 reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]); 404 reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]); 405 reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]); 406 reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]); 407 reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]); 408 reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]); 409 reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]); 410 reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]); 411 reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]); 412 reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]); 413 reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]); 414 415 ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++, 416 amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type), 417 reg_data, CPER_ACA_REG_COUNT); 418 if (ret) 419 return ret; 420 } 421 422 amdgpu_cper_ring_write(ring, corrected, corrected->record_length); 423 kfree(corrected); 424 425 return 0; 426 } 427 428 static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos) 429 { 430 char signature[CPER_SIGNATURE_SZ]; 431 432 if ((pos << 2) >= ring->ring_size) 433 return false; 434 435 if ((pos << 2) + CPER_SIGNATURE_SZ <= ring->ring_size) { 436 memcpy(signature, &ring->ring[pos], CPER_SIGNATURE_SZ); 437 } else { 438 u32 chunk = ring->ring_size - (pos << 2); 439 440 memcpy(signature, &ring->ring[pos], chunk); 441 memcpy(signature + chunk, ring->ring, CPER_SIGNATURE_SZ - chunk); 442 } 443 444 return !memcmp(signature, "CPER", CPER_SIGNATURE_SZ); 445 } 446 447 static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) 448 { 449 struct cper_hdr chdr; 450 u64 p; 451 u32 chunk, rec_len = 0; 452 453 chunk = ring->ring_size - (pos << 2); 454 455 if (amdgpu_cper_is_hdr(ring, pos)) { 456 if (chunk >= sizeof(chdr)) { 457 memcpy(&chdr, &ring->ring[pos], sizeof(chdr)); 458 } else { 459 memcpy(&chdr, &ring->ring[pos], chunk); 460 memcpy((u8 *)&chdr + chunk, ring->ring, sizeof(chdr) - chunk); 461 } 462 463 rec_len = chdr.record_length; 464 goto calc; 465 } 466 467 /* ring buffer is not full, no cper data after ring->wptr */ 468 if (ring->count_dw) 469 goto calc; 470 471 for (p = pos + 1; p <= ring->buf_mask; p++) { 472 if (amdgpu_cper_is_hdr(ring, p)) { 473 rec_len = (p - pos) << 2; 474 goto calc; 475 } 476 } 477 478 calc: 479 if (!rec_len) 480 return chunk; 481 else 482 return umin(rec_len, chunk); 483 } 484 485 void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) 486 { 487 u64 pos, wptr_old, rptr; 488 int rec_cnt_dw = count >> 2; 489 u32 chunk, ent_sz; 490 u8 *s = (u8 *)src; 491 492 if (count >= ring->ring_size - 4) { 493 dev_err(ring->adev->dev, 494 "CPER data size(%d) is larger than ring size(%d)\n", 495 count, ring->ring_size - 4); 496 497 return; 498 } 499 500 mutex_lock(&ring->adev->cper.ring_lock); 501 502 wptr_old = ring->wptr; 503 rptr = *ring->rptr_cpu_addr & ring->ptr_mask; 504 505 while (count) { 506 ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr); 507 chunk = umin(ent_sz, count); 508 509 memcpy(&ring->ring[ring->wptr], s, chunk); 510 511 ring->wptr += (chunk >> 2); 512 ring->wptr &= ring->ptr_mask; 513 count -= chunk; 514 s += chunk; 515 } 516 517 if (ring->count_dw < rec_cnt_dw) 518 ring->count_dw = 0; 519 520 /* the buffer is overflow, adjust rptr */ 521 if (((wptr_old < rptr) && (rptr <= ring->wptr)) || 522 ((ring->wptr < wptr_old) && (wptr_old < rptr)) || 523 ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) { 524 pos = (ring->wptr + 1) & ring->ptr_mask; 525 526 do { 527 ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos); 528 529 rptr += (ent_sz >> 2); 530 rptr &= ring->ptr_mask; 531 *ring->rptr_cpu_addr = rptr; 532 533 pos = rptr; 534 } while (!amdgpu_cper_is_hdr(ring, rptr)); 535 } 536 537 if (ring->count_dw >= rec_cnt_dw) 538 ring->count_dw -= rec_cnt_dw; 539 mutex_unlock(&ring->adev->cper.ring_lock); 540 } 541 542 static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring) 543 { 544 return *(ring->rptr_cpu_addr); 545 } 546 547 static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring) 548 { 549 return ring->wptr; 550 } 551 552 static const struct amdgpu_ring_funcs cper_ring_funcs = { 553 .type = AMDGPU_RING_TYPE_CPER, 554 .align_mask = 0xff, 555 .support_64bit_ptrs = false, 556 .get_rptr = amdgpu_cper_ring_get_rptr, 557 .get_wptr = amdgpu_cper_ring_get_wptr, 558 }; 559 560 static int amdgpu_cper_ring_init(struct amdgpu_device *adev) 561 { 562 struct amdgpu_ring *ring = &(adev->cper.ring_buf); 563 564 mutex_init(&adev->cper.ring_lock); 565 566 ring->adev = NULL; 567 ring->ring_obj = NULL; 568 ring->use_doorbell = false; 569 ring->no_scheduler = true; 570 ring->funcs = &cper_ring_funcs; 571 572 sprintf(ring->name, "cper"); 573 return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0, 574 AMDGPU_RING_PRIO_DEFAULT, NULL); 575 } 576 577 int amdgpu_cper_init(struct amdgpu_device *adev) 578 { 579 int r; 580 581 if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_ras_cper_en(adev)) 582 return 0; 583 else if (!amdgpu_sriov_vf(adev) && !amdgpu_uniras_enabled(adev) && 584 !amdgpu_aca_is_enabled(adev)) 585 return 0; 586 587 r = amdgpu_cper_ring_init(adev); 588 if (r) { 589 dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r); 590 return r; 591 } 592 593 mutex_init(&adev->cper.cper_lock); 594 595 adev->cper.enabled = true; 596 adev->cper.max_count = CPER_MAX_ALLOWED_COUNT; 597 598 return 0; 599 } 600 601 int amdgpu_cper_fini(struct amdgpu_device *adev) 602 { 603 if (!amdgpu_aca_is_enabled(adev) && !amdgpu_sriov_ras_cper_en(adev)) 604 return 0; 605 606 adev->cper.enabled = false; 607 608 amdgpu_ring_fini(&(adev->cper.ring_buf)); 609 adev->cper.count = 0; 610 adev->cper.wptr = 0; 611 612 return 0; 613 } 614