1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2026 Intel Corporation 4 */ 5 6 #include "xe_device.h" 7 #include "xe_printk.h" 8 #include "xe_ras.h" 9 #include "xe_ras_types.h" 10 #include "xe_sysctrl.h" 11 #include "xe_sysctrl_event_types.h" 12 13 /* Severity of detected errors */ 14 enum xe_ras_severity { 15 XE_RAS_SEV_NOT_SUPPORTED = 0, 16 XE_RAS_SEV_CORRECTABLE, 17 XE_RAS_SEV_UNCORRECTABLE, 18 XE_RAS_SEV_INFORMATIONAL, 19 XE_RAS_SEV_MAX 20 }; 21 22 /* Major IP blocks/components where errors can originate */ 23 enum xe_ras_component { 24 XE_RAS_COMP_NOT_SUPPORTED = 0, 25 XE_RAS_COMP_DEVICE_MEMORY, 26 XE_RAS_COMP_CORE_COMPUTE, 27 XE_RAS_COMP_RESERVED, 28 XE_RAS_COMP_PCIE, 29 XE_RAS_COMP_FABRIC, 30 XE_RAS_COMP_SOC_INTERNAL, 31 XE_RAS_COMP_MAX 32 }; 33 34 static const char *const xe_ras_severities[] = { 35 [XE_RAS_SEV_NOT_SUPPORTED] = "Not Supported", 36 [XE_RAS_SEV_CORRECTABLE] = "Correctable Error", 37 [XE_RAS_SEV_UNCORRECTABLE] = "Uncorrectable Error", 38 [XE_RAS_SEV_INFORMATIONAL] = "Informational Error", 39 }; 40 static_assert(ARRAY_SIZE(xe_ras_severities) == XE_RAS_SEV_MAX); 41 42 static const char *const xe_ras_components[] = { 43 [XE_RAS_COMP_NOT_SUPPORTED] = "Not Supported", 44 [XE_RAS_COMP_DEVICE_MEMORY] = "Device Memory", 45 [XE_RAS_COMP_CORE_COMPUTE] = "Core Compute", 46 [XE_RAS_COMP_RESERVED] = "Reserved", 47 [XE_RAS_COMP_PCIE] = "PCIe", 48 [XE_RAS_COMP_FABRIC] = "Fabric", 49 [XE_RAS_COMP_SOC_INTERNAL] = "SoC Internal", 50 }; 51 static_assert(ARRAY_SIZE(xe_ras_components) == XE_RAS_COMP_MAX); 52 53 static inline const char *sev_to_str(u8 severity) 54 { 55 if (severity >= XE_RAS_SEV_MAX) 56 severity = XE_RAS_SEV_NOT_SUPPORTED; 57 58 return xe_ras_severities[severity]; 59 } 60 61 static inline const char *comp_to_str(u8 component) 62 { 63 if (component >= XE_RAS_COMP_MAX) 64 component = XE_RAS_COMP_NOT_SUPPORTED; 65 66 return xe_ras_components[component]; 67 } 68 69 void xe_ras_counter_threshold_crossed(struct xe_device *xe, 70 struct xe_sysctrl_event_response *response) 71 { 72 struct xe_ras_threshold_crossed *pending = (void *)&response->data; 73 struct xe_ras_error_class *errors = pending->counters; 74 u32 id, ncounters = pending->ncounters; 75 76 BUILD_BUG_ON(sizeof(response->data) < sizeof(*pending)); 77 xe_device_assert_mem_access(xe); 78 79 if (!ncounters || ncounters > XE_RAS_NUM_COUNTERS) 80 xe_err(xe, "sysctrl: unexpected counter threshold crossed %u\n", ncounters); 81 else 82 xe_warn(xe, "[RAS]: counter threshold crossed, %u new errors\n", ncounters); 83 84 for (id = 0; id < ncounters && id < XE_RAS_NUM_COUNTERS; id++) { 85 u8 severity, component; 86 87 severity = errors[id].common.severity; 88 component = errors[id].common.component; 89 90 xe_warn(xe, "[RAS]: %s %s detected\n", 91 comp_to_str(component), sev_to_str(severity)); 92 } 93 } 94