192d5d2a0SHawking Zhang /* SPDX-License-Identifier: GPL-2.0 */ 292d5d2a0SHawking Zhang /* 392d5d2a0SHawking Zhang * Copyright 2025 Advanced Micro Devices, Inc. 492d5d2a0SHawking Zhang * 592d5d2a0SHawking Zhang * Permission is hereby granted, free of charge, to any person obtaining a 692d5d2a0SHawking Zhang * copy of this software and associated documentation files (the "Software"), 792d5d2a0SHawking Zhang * to deal in the Software without restriction, including without limitation 892d5d2a0SHawking Zhang * the rights to use, copy, modify, merge, publish, distribute, sublicense, 992d5d2a0SHawking Zhang * and/or sell copies of the Software, and to permit persons to whom the 1092d5d2a0SHawking Zhang * Software is furnished to do so, subject to the following conditions: 1192d5d2a0SHawking Zhang * 1292d5d2a0SHawking Zhang * The above copyright notice and this permission notice shall be included in 1392d5d2a0SHawking Zhang * all copies or substantial portions of the Software. 1492d5d2a0SHawking Zhang * 1592d5d2a0SHawking Zhang * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1692d5d2a0SHawking Zhang * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1792d5d2a0SHawking Zhang * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1892d5d2a0SHawking Zhang * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 1992d5d2a0SHawking Zhang * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 2092d5d2a0SHawking Zhang * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2192d5d2a0SHawking Zhang * OTHER DEALINGS IN THE SOFTWARE. 2292d5d2a0SHawking Zhang * 2392d5d2a0SHawking Zhang */ 2492d5d2a0SHawking Zhang 2592d5d2a0SHawking Zhang #ifndef __AMDGPU_CPER_H__ 2692d5d2a0SHawking Zhang #define __AMDGPU_CPER_H__ 2792d5d2a0SHawking Zhang 2892d5d2a0SHawking Zhang #include "amd_cper.h" 29ad97840fSHawking Zhang #include "amdgpu_aca.h" 3092d5d2a0SHawking Zhang 3192d5d2a0SHawking Zhang #define CPER_MAX_ALLOWED_COUNT 0x1000 324d614ce8STao Zhou #define CPER_MAX_RING_SIZE 0X100000 3392d5d2a0SHawking Zhang #define HDR_LEN (sizeof(struct cper_hdr)) 3492d5d2a0SHawking Zhang #define SEC_DESC_LEN (sizeof(struct cper_sec_desc)) 3592d5d2a0SHawking Zhang 3692d5d2a0SHawking Zhang #define BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot)) 3792d5d2a0SHawking Zhang #define FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal)) 3892d5d2a0SHawking Zhang #define NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err)) 3992d5d2a0SHawking Zhang 4092d5d2a0SHawking Zhang #define SEC_DESC_OFFSET(idx) (HDR_LEN + (SEC_DESC_LEN * idx)) 4192d5d2a0SHawking Zhang 4292d5d2a0SHawking Zhang #define BOOT_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx)) 4392d5d2a0SHawking Zhang #define FATAL_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx)) 4492d5d2a0SHawking Zhang #define NONSTD_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx)) 4592d5d2a0SHawking Zhang 4692d5d2a0SHawking Zhang enum amdgpu_cper_type { 4792d5d2a0SHawking Zhang AMDGPU_CPER_TYPE_RUNTIME, 4892d5d2a0SHawking Zhang AMDGPU_CPER_TYPE_FATAL, 4992d5d2a0SHawking Zhang AMDGPU_CPER_TYPE_BOOT, 5092d5d2a0SHawking Zhang AMDGPU_CPER_TYPE_BP_THRESHOLD, 5192d5d2a0SHawking Zhang }; 5292d5d2a0SHawking Zhang 5392d5d2a0SHawking Zhang struct amdgpu_cper { 5492d5d2a0SHawking Zhang bool enabled; 5592d5d2a0SHawking Zhang 5692d5d2a0SHawking Zhang atomic_t unique_id; 5792d5d2a0SHawking Zhang struct mutex cper_lock; 5892d5d2a0SHawking Zhang 5992d5d2a0SHawking Zhang /* Lifetime CPERs generated */ 6092d5d2a0SHawking Zhang uint32_t count; 6192d5d2a0SHawking Zhang uint32_t max_count; 6292d5d2a0SHawking Zhang 6392d5d2a0SHawking Zhang uint32_t wptr; 6492d5d2a0SHawking Zhang 6592d5d2a0SHawking Zhang void *ring[CPER_MAX_ALLOWED_COUNT]; 664d614ce8STao Zhou struct amdgpu_ring ring_buf; 678652920dSTao Zhou struct mutex ring_lock; 6892d5d2a0SHawking Zhang }; 6992d5d2a0SHawking Zhang 7092d5d2a0SHawking Zhang void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev, 7192d5d2a0SHawking Zhang struct cper_hdr *hdr, 7292d5d2a0SHawking Zhang enum amdgpu_cper_type type, 7392d5d2a0SHawking Zhang enum cper_error_severity sev); 7492d5d2a0SHawking Zhang int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev, 7592d5d2a0SHawking Zhang struct cper_hdr *hdr, 7692d5d2a0SHawking Zhang uint32_t idx, 7792d5d2a0SHawking Zhang struct cper_sec_crashdump_reg_data reg_data); 7892d5d2a0SHawking Zhang int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev, 7992d5d2a0SHawking Zhang struct cper_hdr *hdr, 8092d5d2a0SHawking Zhang uint32_t idx, 8192d5d2a0SHawking Zhang enum cper_error_severity sev, 8292d5d2a0SHawking Zhang uint32_t *reg_dump, 8392d5d2a0SHawking Zhang uint32_t reg_count); 8492d5d2a0SHawking Zhang int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev, 8592d5d2a0SHawking Zhang struct cper_hdr *hdr, 8692d5d2a0SHawking Zhang uint32_t section_idx); 8792d5d2a0SHawking Zhang 8892d5d2a0SHawking Zhang struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev, 8992d5d2a0SHawking Zhang enum amdgpu_cper_type type, 9092d5d2a0SHawking Zhang uint16_t section_count); 91ad97840fSHawking Zhang /* UE must be encoded into separated cper entries, 1 UE 1 cper */ 92ad97840fSHawking Zhang int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev, 93ad97840fSHawking Zhang struct aca_bank *bank); 94ad97840fSHawking Zhang /* CEs and DEs are combined into 1 cper entry */ 95ad97840fSHawking Zhang int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev, 96ad97840fSHawking Zhang struct aca_banks *banks, 97ad97840fSHawking Zhang uint16_t bank_count); 98*f9d35b94SXiang Liu /* Bad page threshold is encoded into separated cper entry */ 99*f9d35b94SXiang Liu int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev); 100a6d9d192STao Zhou void amdgpu_cper_ring_write(struct amdgpu_ring *ring, 101a6d9d192STao Zhou void *src, int count); 10292d5d2a0SHawking Zhang int amdgpu_cper_init(struct amdgpu_device *adev); 10392d5d2a0SHawking Zhang int amdgpu_cper_fini(struct amdgpu_device *adev); 10492d5d2a0SHawking Zhang 10592d5d2a0SHawking Zhang #endif 106