1*c11a50b1SJeff Hugo // SPDX-License-Identifier: GPL-2.0-only 2*c11a50b1SJeff Hugo 3*c11a50b1SJeff Hugo /* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ 4*c11a50b1SJeff Hugo /* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ 5*c11a50b1SJeff Hugo /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 6*c11a50b1SJeff Hugo 7*c11a50b1SJeff Hugo #include <asm/byteorder.h> 8*c11a50b1SJeff Hugo #include <linux/device.h> 9*c11a50b1SJeff Hugo #include <linux/kernel.h> 10*c11a50b1SJeff Hugo #include <linux/mhi.h> 11*c11a50b1SJeff Hugo 12*c11a50b1SJeff Hugo #include "qaic.h" 13*c11a50b1SJeff Hugo #include "qaic_ras.h" 14*c11a50b1SJeff Hugo 15*c11a50b1SJeff Hugo #define MAGIC 0x55AA 16*c11a50b1SJeff Hugo #define VERSION 0x2 17*c11a50b1SJeff Hugo #define HDR_SZ 12 18*c11a50b1SJeff Hugo #define NUM_TEMP_LVL 3 19*c11a50b1SJeff Hugo #define POWER_BREAK BIT(0) 20*c11a50b1SJeff Hugo 21*c11a50b1SJeff Hugo enum msg_type { 22*c11a50b1SJeff Hugo MSG_PUSH, /* async push from device */ 23*c11a50b1SJeff Hugo MSG_REQ, /* sync request to device */ 24*c11a50b1SJeff Hugo MSG_RESP, /* sync response from device */ 25*c11a50b1SJeff Hugo }; 26*c11a50b1SJeff Hugo 27*c11a50b1SJeff Hugo enum err_type { 28*c11a50b1SJeff Hugo CE, /* correctable error */ 29*c11a50b1SJeff Hugo UE, /* uncorrectable error */ 30*c11a50b1SJeff Hugo UE_NF, /* uncorrectable error that is non-fatal, expect a disruption */ 31*c11a50b1SJeff Hugo ERR_TYPE_MAX, 32*c11a50b1SJeff Hugo }; 33*c11a50b1SJeff Hugo 34*c11a50b1SJeff Hugo static const char * const err_type_str[] = { 35*c11a50b1SJeff Hugo [CE] = "Correctable", 36*c11a50b1SJeff Hugo [UE] = "Uncorrectable", 37*c11a50b1SJeff Hugo [UE_NF] = "Uncorrectable Non-Fatal", 38*c11a50b1SJeff Hugo }; 39*c11a50b1SJeff Hugo 40*c11a50b1SJeff Hugo static const char * const err_class_str[] = { 41*c11a50b1SJeff Hugo [CE] = "Warning", 42*c11a50b1SJeff Hugo [UE] = "Fatal", 43*c11a50b1SJeff Hugo [UE_NF] = "Warning", 44*c11a50b1SJeff Hugo }; 45*c11a50b1SJeff Hugo 46*c11a50b1SJeff Hugo enum err_source { 47*c11a50b1SJeff Hugo SOC_MEM, 48*c11a50b1SJeff Hugo PCIE, 49*c11a50b1SJeff Hugo DDR, 50*c11a50b1SJeff Hugo SYS_BUS1, 51*c11a50b1SJeff Hugo SYS_BUS2, 52*c11a50b1SJeff Hugo NSP_MEM, 53*c11a50b1SJeff Hugo TSENS, 54*c11a50b1SJeff Hugo }; 55*c11a50b1SJeff Hugo 56*c11a50b1SJeff Hugo static const char * const err_src_str[TSENS + 1] = { 57*c11a50b1SJeff Hugo [SOC_MEM] = "SoC Memory", 58*c11a50b1SJeff Hugo [PCIE] = "PCIE", 59*c11a50b1SJeff Hugo [DDR] = "DDR", 60*c11a50b1SJeff Hugo [SYS_BUS1] = "System Bus source 1", 61*c11a50b1SJeff Hugo [SYS_BUS2] = "System Bus source 2", 62*c11a50b1SJeff Hugo [NSP_MEM] = "NSP Memory", 63*c11a50b1SJeff Hugo [TSENS] = "Temperature Sensors", 64*c11a50b1SJeff Hugo }; 65*c11a50b1SJeff Hugo 66*c11a50b1SJeff Hugo struct ras_data { 67*c11a50b1SJeff Hugo /* header start */ 68*c11a50b1SJeff Hugo /* Magic number to validate the message */ 69*c11a50b1SJeff Hugo u16 magic; 70*c11a50b1SJeff Hugo /* RAS version number */ 71*c11a50b1SJeff Hugo u16 ver; 72*c11a50b1SJeff Hugo u32 seq_num; 73*c11a50b1SJeff Hugo /* RAS message type */ 74*c11a50b1SJeff Hugo u8 type; 75*c11a50b1SJeff Hugo u8 id; 76*c11a50b1SJeff Hugo /* Size of RAS message without the header in byte */ 77*c11a50b1SJeff Hugo u16 len; 78*c11a50b1SJeff Hugo /* header end */ 79*c11a50b1SJeff Hugo s32 result; 80*c11a50b1SJeff Hugo /* 81*c11a50b1SJeff Hugo * Error source 82*c11a50b1SJeff Hugo * 0 : SoC Memory 83*c11a50b1SJeff Hugo * 1 : PCIE 84*c11a50b1SJeff Hugo * 2 : DDR 85*c11a50b1SJeff Hugo * 3 : System Bus source 1 86*c11a50b1SJeff Hugo * 4 : System Bus source 2 87*c11a50b1SJeff Hugo * 5 : NSP Memory 88*c11a50b1SJeff Hugo * 6 : Temperature Sensors 89*c11a50b1SJeff Hugo */ 90*c11a50b1SJeff Hugo u32 source; 91*c11a50b1SJeff Hugo /* 92*c11a50b1SJeff Hugo * Stores the error type, there are three types of error in RAS 93*c11a50b1SJeff Hugo * 0 : correctable error (CE) 94*c11a50b1SJeff Hugo * 1 : uncorrectable error (UE) 95*c11a50b1SJeff Hugo * 2 : uncorrectable error that is non-fatal (UE_NF) 96*c11a50b1SJeff Hugo */ 97*c11a50b1SJeff Hugo u32 err_type; 98*c11a50b1SJeff Hugo u32 err_threshold; 99*c11a50b1SJeff Hugo u32 ce_count; 100*c11a50b1SJeff Hugo u32 ue_count; 101*c11a50b1SJeff Hugo u32 intr_num; 102*c11a50b1SJeff Hugo /* Data specific to error source */ 103*c11a50b1SJeff Hugo u8 syndrome[64]; 104*c11a50b1SJeff Hugo } __packed; 105*c11a50b1SJeff Hugo 106*c11a50b1SJeff Hugo struct soc_mem_syndrome { 107*c11a50b1SJeff Hugo u64 error_address[8]; 108*c11a50b1SJeff Hugo } __packed; 109*c11a50b1SJeff Hugo 110*c11a50b1SJeff Hugo struct nsp_mem_syndrome { 111*c11a50b1SJeff Hugo u32 error_address[8]; 112*c11a50b1SJeff Hugo u8 nsp_id; 113*c11a50b1SJeff Hugo } __packed; 114*c11a50b1SJeff Hugo 115*c11a50b1SJeff Hugo struct ddr_syndrome { 116*c11a50b1SJeff Hugo u32 count; 117*c11a50b1SJeff Hugo u32 irq_status; 118*c11a50b1SJeff Hugo u32 data_31_0[2]; 119*c11a50b1SJeff Hugo u32 data_63_32[2]; 120*c11a50b1SJeff Hugo u32 data_95_64[2]; 121*c11a50b1SJeff Hugo u32 data_127_96[2]; 122*c11a50b1SJeff Hugo u32 addr_lsb; 123*c11a50b1SJeff Hugo u16 addr_msb; 124*c11a50b1SJeff Hugo u16 parity_bits; 125*c11a50b1SJeff Hugo u16 instance; 126*c11a50b1SJeff Hugo u16 err_type; 127*c11a50b1SJeff Hugo } __packed; 128*c11a50b1SJeff Hugo 129*c11a50b1SJeff Hugo struct tsens_syndrome { 130*c11a50b1SJeff Hugo u32 threshold_type; 131*c11a50b1SJeff Hugo s32 temp; 132*c11a50b1SJeff Hugo } __packed; 133*c11a50b1SJeff Hugo 134*c11a50b1SJeff Hugo struct sysbus1_syndrome { 135*c11a50b1SJeff Hugo u32 slave; 136*c11a50b1SJeff Hugo u32 err_type; 137*c11a50b1SJeff Hugo u16 addr[8]; 138*c11a50b1SJeff Hugo u8 instance; 139*c11a50b1SJeff Hugo } __packed; 140*c11a50b1SJeff Hugo 141*c11a50b1SJeff Hugo struct sysbus2_syndrome { 142*c11a50b1SJeff Hugo u32 lsb3; 143*c11a50b1SJeff Hugo u32 msb3; 144*c11a50b1SJeff Hugo u32 lsb2; 145*c11a50b1SJeff Hugo u32 msb2; 146*c11a50b1SJeff Hugo u32 ext_id; 147*c11a50b1SJeff Hugo u16 path; 148*c11a50b1SJeff Hugo u16 op_type; 149*c11a50b1SJeff Hugo u16 len; 150*c11a50b1SJeff Hugo u16 redirect; 151*c11a50b1SJeff Hugo u8 valid; 152*c11a50b1SJeff Hugo u8 word_error; 153*c11a50b1SJeff Hugo u8 non_secure; 154*c11a50b1SJeff Hugo u8 opc; 155*c11a50b1SJeff Hugo u8 error_code; 156*c11a50b1SJeff Hugo u8 trans_type; 157*c11a50b1SJeff Hugo u8 addr_space; 158*c11a50b1SJeff Hugo u8 instance; 159*c11a50b1SJeff Hugo } __packed; 160*c11a50b1SJeff Hugo 161*c11a50b1SJeff Hugo struct pcie_syndrome { 162*c11a50b1SJeff Hugo /* CE info */ 163*c11a50b1SJeff Hugo u32 bad_tlp; 164*c11a50b1SJeff Hugo u32 bad_dllp; 165*c11a50b1SJeff Hugo u32 replay_rollover; 166*c11a50b1SJeff Hugo u32 replay_timeout; 167*c11a50b1SJeff Hugo u32 rx_err; 168*c11a50b1SJeff Hugo u32 internal_ce_count; 169*c11a50b1SJeff Hugo /* UE_NF info */ 170*c11a50b1SJeff Hugo u32 fc_timeout; 171*c11a50b1SJeff Hugo u32 poison_tlp; 172*c11a50b1SJeff Hugo u32 ecrc_err; 173*c11a50b1SJeff Hugo u32 unsupported_req; 174*c11a50b1SJeff Hugo u32 completer_abort; 175*c11a50b1SJeff Hugo u32 completion_timeout; 176*c11a50b1SJeff Hugo /* UE info */ 177*c11a50b1SJeff Hugo u32 addr; 178*c11a50b1SJeff Hugo u8 index; 179*c11a50b1SJeff Hugo /* 180*c11a50b1SJeff Hugo * Flag to indicate specific event of PCIe 181*c11a50b1SJeff Hugo * BIT(0): Power break (low power) 182*c11a50b1SJeff Hugo * BIT(1) to BIT(7): Reserved 183*c11a50b1SJeff Hugo */ 184*c11a50b1SJeff Hugo u8 flag; 185*c11a50b1SJeff Hugo } __packed; 186*c11a50b1SJeff Hugo 187*c11a50b1SJeff Hugo static const char * const threshold_type_str[NUM_TEMP_LVL] = { 188*c11a50b1SJeff Hugo [0] = "lower", 189*c11a50b1SJeff Hugo [1] = "upper", 190*c11a50b1SJeff Hugo [2] = "critical", 191*c11a50b1SJeff Hugo }; 192*c11a50b1SJeff Hugo 193*c11a50b1SJeff Hugo static void ras_msg_to_cpu(struct ras_data *msg) 194*c11a50b1SJeff Hugo { 195*c11a50b1SJeff Hugo struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; 196*c11a50b1SJeff Hugo struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; 197*c11a50b1SJeff Hugo struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; 198*c11a50b1SJeff Hugo struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; 199*c11a50b1SJeff Hugo struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; 200*c11a50b1SJeff Hugo struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; 201*c11a50b1SJeff Hugo struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; 202*c11a50b1SJeff Hugo int i; 203*c11a50b1SJeff Hugo 204*c11a50b1SJeff Hugo le16_to_cpus(&msg->magic); 205*c11a50b1SJeff Hugo le16_to_cpus(&msg->ver); 206*c11a50b1SJeff Hugo le32_to_cpus(&msg->seq_num); 207*c11a50b1SJeff Hugo le16_to_cpus(&msg->len); 208*c11a50b1SJeff Hugo le32_to_cpus(&msg->result); 209*c11a50b1SJeff Hugo le32_to_cpus(&msg->source); 210*c11a50b1SJeff Hugo le32_to_cpus(&msg->err_type); 211*c11a50b1SJeff Hugo le32_to_cpus(&msg->err_threshold); 212*c11a50b1SJeff Hugo le32_to_cpus(&msg->ce_count); 213*c11a50b1SJeff Hugo le32_to_cpus(&msg->ue_count); 214*c11a50b1SJeff Hugo le32_to_cpus(&msg->intr_num); 215*c11a50b1SJeff Hugo 216*c11a50b1SJeff Hugo switch (msg->source) { 217*c11a50b1SJeff Hugo case SOC_MEM: 218*c11a50b1SJeff Hugo for (i = 0; i < 8; i++) 219*c11a50b1SJeff Hugo le64_to_cpus(&soc_syndrome->error_address[i]); 220*c11a50b1SJeff Hugo break; 221*c11a50b1SJeff Hugo case PCIE: 222*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->bad_tlp); 223*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->bad_dllp); 224*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->replay_rollover); 225*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->replay_timeout); 226*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->rx_err); 227*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->internal_ce_count); 228*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->fc_timeout); 229*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->poison_tlp); 230*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->ecrc_err); 231*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->unsupported_req); 232*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->completer_abort); 233*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->completion_timeout); 234*c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->addr); 235*c11a50b1SJeff Hugo break; 236*c11a50b1SJeff Hugo case DDR: 237*c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->instance); 238*c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->err_type); 239*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->count); 240*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->irq_status); 241*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_31_0[0]); 242*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_31_0[1]); 243*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_63_32[0]); 244*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_63_32[1]); 245*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_95_64[0]); 246*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_95_64[1]); 247*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_127_96[0]); 248*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_127_96[1]); 249*c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->parity_bits); 250*c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->addr_msb); 251*c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->addr_lsb); 252*c11a50b1SJeff Hugo break; 253*c11a50b1SJeff Hugo case SYS_BUS1: 254*c11a50b1SJeff Hugo le32_to_cpus(&sysbus1_syndrome->slave); 255*c11a50b1SJeff Hugo le32_to_cpus(&sysbus1_syndrome->err_type); 256*c11a50b1SJeff Hugo for (i = 0; i < 8; i++) 257*c11a50b1SJeff Hugo le16_to_cpus(&sysbus1_syndrome->addr[i]); 258*c11a50b1SJeff Hugo break; 259*c11a50b1SJeff Hugo case SYS_BUS2: 260*c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->op_type); 261*c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->len); 262*c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->redirect); 263*c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->path); 264*c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->ext_id); 265*c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->lsb2); 266*c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->msb2); 267*c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->lsb3); 268*c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->msb3); 269*c11a50b1SJeff Hugo break; 270*c11a50b1SJeff Hugo case NSP_MEM: 271*c11a50b1SJeff Hugo for (i = 0; i < 8; i++) 272*c11a50b1SJeff Hugo le32_to_cpus(&nsp_syndrome->error_address[i]); 273*c11a50b1SJeff Hugo break; 274*c11a50b1SJeff Hugo case TSENS: 275*c11a50b1SJeff Hugo le32_to_cpus(&tsens_syndrome->threshold_type); 276*c11a50b1SJeff Hugo le32_to_cpus(&tsens_syndrome->temp); 277*c11a50b1SJeff Hugo break; 278*c11a50b1SJeff Hugo } 279*c11a50b1SJeff Hugo } 280*c11a50b1SJeff Hugo 281*c11a50b1SJeff Hugo static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) 282*c11a50b1SJeff Hugo { 283*c11a50b1SJeff Hugo struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; 284*c11a50b1SJeff Hugo struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; 285*c11a50b1SJeff Hugo struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; 286*c11a50b1SJeff Hugo struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; 287*c11a50b1SJeff Hugo struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; 288*c11a50b1SJeff Hugo struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; 289*c11a50b1SJeff Hugo struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; 290*c11a50b1SJeff Hugo char *class; 291*c11a50b1SJeff Hugo char *level; 292*c11a50b1SJeff Hugo 293*c11a50b1SJeff Hugo if (msg->magic != MAGIC) { 294*c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic); 295*c11a50b1SJeff Hugo return; 296*c11a50b1SJeff Hugo } 297*c11a50b1SJeff Hugo 298*c11a50b1SJeff Hugo if (!msg->ver || msg->ver > VERSION) { 299*c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver); 300*c11a50b1SJeff Hugo return; 301*c11a50b1SJeff Hugo } 302*c11a50b1SJeff Hugo 303*c11a50b1SJeff Hugo if (msg->type != MSG_PUSH) { 304*c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n"); 305*c11a50b1SJeff Hugo return; 306*c11a50b1SJeff Hugo } 307*c11a50b1SJeff Hugo 308*c11a50b1SJeff Hugo if (msg->len != sizeof(*msg) - HDR_SZ) { 309*c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len); 310*c11a50b1SJeff Hugo return; 311*c11a50b1SJeff Hugo } 312*c11a50b1SJeff Hugo 313*c11a50b1SJeff Hugo if (msg->err_type >= ERR_TYPE_MAX) { 314*c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type); 315*c11a50b1SJeff Hugo return; 316*c11a50b1SJeff Hugo } 317*c11a50b1SJeff Hugo 318*c11a50b1SJeff Hugo if (msg->err_type == UE) 319*c11a50b1SJeff Hugo level = KERN_ERR; 320*c11a50b1SJeff Hugo else 321*c11a50b1SJeff Hugo level = KERN_WARNING; 322*c11a50b1SJeff Hugo 323*c11a50b1SJeff Hugo switch (msg->source) { 324*c11a50b1SJeff Hugo case SOC_MEM: 325*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n", 326*c11a50b1SJeff Hugo err_class_str[msg->err_type], 327*c11a50b1SJeff Hugo err_type_str[msg->err_type], 328*c11a50b1SJeff Hugo "error from", 329*c11a50b1SJeff Hugo err_src_str[msg->source], 330*c11a50b1SJeff Hugo msg->err_threshold, 331*c11a50b1SJeff Hugo soc_syndrome->error_address[0], 332*c11a50b1SJeff Hugo soc_syndrome->error_address[1], 333*c11a50b1SJeff Hugo soc_syndrome->error_address[2], 334*c11a50b1SJeff Hugo soc_syndrome->error_address[3], 335*c11a50b1SJeff Hugo soc_syndrome->error_address[4], 336*c11a50b1SJeff Hugo soc_syndrome->error_address[5], 337*c11a50b1SJeff Hugo soc_syndrome->error_address[6], 338*c11a50b1SJeff Hugo soc_syndrome->error_address[7]); 339*c11a50b1SJeff Hugo break; 340*c11a50b1SJeff Hugo case PCIE: 341*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n", 342*c11a50b1SJeff Hugo err_class_str[msg->err_type], 343*c11a50b1SJeff Hugo err_type_str[msg->err_type], 344*c11a50b1SJeff Hugo "error from", 345*c11a50b1SJeff Hugo err_src_str[msg->source], 346*c11a50b1SJeff Hugo msg->err_threshold); 347*c11a50b1SJeff Hugo 348*c11a50b1SJeff Hugo switch (msg->err_type) { 349*c11a50b1SJeff Hugo case CE: 350*c11a50b1SJeff Hugo /* 351*c11a50b1SJeff Hugo * Modeled after AER prints. This continues the pci_printk() from a few 352*c11a50b1SJeff Hugo * lines up. We reduce duplication of code, but also avoid re-printing the 353*c11a50b1SJeff Hugo * PCI device info so that the end result looks uniform to the log user. 354*c11a50b1SJeff Hugo */ 355*c11a50b1SJeff Hugo printk(KERN_WARNING pr_fmt("Syndrome:\n Bad TLP count %d\n Bad DLLP count %d\n Replay Rollover count %d\n Replay Timeout count %d\n Recv Error count %d\n Internal CE count %d\n"), 356*c11a50b1SJeff Hugo pcie_syndrome->bad_tlp, 357*c11a50b1SJeff Hugo pcie_syndrome->bad_dllp, 358*c11a50b1SJeff Hugo pcie_syndrome->replay_rollover, 359*c11a50b1SJeff Hugo pcie_syndrome->replay_timeout, 360*c11a50b1SJeff Hugo pcie_syndrome->rx_err, 361*c11a50b1SJeff Hugo pcie_syndrome->internal_ce_count); 362*c11a50b1SJeff Hugo if (msg->ver > 0x1) 363*c11a50b1SJeff Hugo pr_warn(" Power break %s\n", 364*c11a50b1SJeff Hugo pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF"); 365*c11a50b1SJeff Hugo break; 366*c11a50b1SJeff Hugo case UE: 367*c11a50b1SJeff Hugo printk(KERN_ERR pr_fmt("Syndrome:\n Index %d\n Address 0x%x\n"), 368*c11a50b1SJeff Hugo pcie_syndrome->index, pcie_syndrome->addr); 369*c11a50b1SJeff Hugo break; 370*c11a50b1SJeff Hugo case UE_NF: 371*c11a50b1SJeff Hugo printk(KERN_WARNING pr_fmt("Syndrome:\n FC timeout count %d\n Poisoned TLP count %d\n ECRC error count %d\n Unsupported request count %d\n Completer abort count %d\n Completion timeout count %d\n"), 372*c11a50b1SJeff Hugo pcie_syndrome->fc_timeout, 373*c11a50b1SJeff Hugo pcie_syndrome->poison_tlp, 374*c11a50b1SJeff Hugo pcie_syndrome->ecrc_err, 375*c11a50b1SJeff Hugo pcie_syndrome->unsupported_req, 376*c11a50b1SJeff Hugo pcie_syndrome->completer_abort, 377*c11a50b1SJeff Hugo pcie_syndrome->completion_timeout); 378*c11a50b1SJeff Hugo break; 379*c11a50b1SJeff Hugo default: 380*c11a50b1SJeff Hugo break; 381*c11a50b1SJeff Hugo } 382*c11a50b1SJeff Hugo break; 383*c11a50b1SJeff Hugo case DDR: 384*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n Instance %d\n Count %d\n Data 31_0 0x%x 0x%x\n Data 63_32 0x%x 0x%x\n Data 95_64 0x%x 0x%x\n Data 127_96 0x%x 0x%x\n Parity bits 0x%x\n Address msb 0x%x\n Address lsb 0x%x\n", 385*c11a50b1SJeff Hugo err_class_str[msg->err_type], 386*c11a50b1SJeff Hugo err_type_str[msg->err_type], 387*c11a50b1SJeff Hugo "error from", 388*c11a50b1SJeff Hugo err_src_str[msg->source], 389*c11a50b1SJeff Hugo msg->err_threshold, 390*c11a50b1SJeff Hugo ddr_syndrome->instance, 391*c11a50b1SJeff Hugo ddr_syndrome->count, 392*c11a50b1SJeff Hugo ddr_syndrome->data_31_0[1], 393*c11a50b1SJeff Hugo ddr_syndrome->data_31_0[0], 394*c11a50b1SJeff Hugo ddr_syndrome->data_63_32[1], 395*c11a50b1SJeff Hugo ddr_syndrome->data_63_32[0], 396*c11a50b1SJeff Hugo ddr_syndrome->data_95_64[1], 397*c11a50b1SJeff Hugo ddr_syndrome->data_95_64[0], 398*c11a50b1SJeff Hugo ddr_syndrome->data_127_96[1], 399*c11a50b1SJeff Hugo ddr_syndrome->data_127_96[0], 400*c11a50b1SJeff Hugo ddr_syndrome->parity_bits, 401*c11a50b1SJeff Hugo ddr_syndrome->addr_msb, 402*c11a50b1SJeff Hugo ddr_syndrome->addr_lsb); 403*c11a50b1SJeff Hugo break; 404*c11a50b1SJeff Hugo case SYS_BUS1: 405*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n %s\n err_type %d\n address0 0x%x\n address1 0x%x\n address2 0x%x\n address3 0x%x\n address4 0x%x\n address5 0x%x\n address6 0x%x\n address7 0x%x\n", 406*c11a50b1SJeff Hugo err_class_str[msg->err_type], 407*c11a50b1SJeff Hugo err_type_str[msg->err_type], 408*c11a50b1SJeff Hugo "error from", 409*c11a50b1SJeff Hugo err_src_str[msg->source], 410*c11a50b1SJeff Hugo msg->err_threshold, 411*c11a50b1SJeff Hugo sysbus1_syndrome->instance, 412*c11a50b1SJeff Hugo sysbus1_syndrome->slave ? "Slave" : "Master", 413*c11a50b1SJeff Hugo sysbus1_syndrome->err_type, 414*c11a50b1SJeff Hugo sysbus1_syndrome->addr[0], 415*c11a50b1SJeff Hugo sysbus1_syndrome->addr[1], 416*c11a50b1SJeff Hugo sysbus1_syndrome->addr[2], 417*c11a50b1SJeff Hugo sysbus1_syndrome->addr[3], 418*c11a50b1SJeff Hugo sysbus1_syndrome->addr[4], 419*c11a50b1SJeff Hugo sysbus1_syndrome->addr[5], 420*c11a50b1SJeff Hugo sysbus1_syndrome->addr[6], 421*c11a50b1SJeff Hugo sysbus1_syndrome->addr[7]); 422*c11a50b1SJeff Hugo break; 423*c11a50b1SJeff Hugo case SYS_BUS2: 424*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n valid %d\n word error %d\n non-secure %d\n opc %d\n error code %d\n transaction type %d\n address space %d\n operation type %d\n len %d\n redirect %d\n path %d\n ext_id %d\n lsb2 %d\n msb2 %d\n lsb3 %d\n msb3 %d\n", 425*c11a50b1SJeff Hugo err_class_str[msg->err_type], 426*c11a50b1SJeff Hugo err_type_str[msg->err_type], 427*c11a50b1SJeff Hugo "error from", 428*c11a50b1SJeff Hugo err_src_str[msg->source], 429*c11a50b1SJeff Hugo msg->err_threshold, 430*c11a50b1SJeff Hugo sysbus2_syndrome->instance, 431*c11a50b1SJeff Hugo sysbus2_syndrome->valid, 432*c11a50b1SJeff Hugo sysbus2_syndrome->word_error, 433*c11a50b1SJeff Hugo sysbus2_syndrome->non_secure, 434*c11a50b1SJeff Hugo sysbus2_syndrome->opc, 435*c11a50b1SJeff Hugo sysbus2_syndrome->error_code, 436*c11a50b1SJeff Hugo sysbus2_syndrome->trans_type, 437*c11a50b1SJeff Hugo sysbus2_syndrome->addr_space, 438*c11a50b1SJeff Hugo sysbus2_syndrome->op_type, 439*c11a50b1SJeff Hugo sysbus2_syndrome->len, 440*c11a50b1SJeff Hugo sysbus2_syndrome->redirect, 441*c11a50b1SJeff Hugo sysbus2_syndrome->path, 442*c11a50b1SJeff Hugo sysbus2_syndrome->ext_id, 443*c11a50b1SJeff Hugo sysbus2_syndrome->lsb2, 444*c11a50b1SJeff Hugo sysbus2_syndrome->msb2, 445*c11a50b1SJeff Hugo sysbus2_syndrome->lsb3, 446*c11a50b1SJeff Hugo sysbus2_syndrome->msb3); 447*c11a50b1SJeff Hugo break; 448*c11a50b1SJeff Hugo case NSP_MEM: 449*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n NSP ID %d\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n", 450*c11a50b1SJeff Hugo err_class_str[msg->err_type], 451*c11a50b1SJeff Hugo err_type_str[msg->err_type], 452*c11a50b1SJeff Hugo "error from", 453*c11a50b1SJeff Hugo err_src_str[msg->source], 454*c11a50b1SJeff Hugo msg->err_threshold, 455*c11a50b1SJeff Hugo nsp_syndrome->nsp_id, 456*c11a50b1SJeff Hugo nsp_syndrome->error_address[0], 457*c11a50b1SJeff Hugo nsp_syndrome->error_address[1], 458*c11a50b1SJeff Hugo nsp_syndrome->error_address[2], 459*c11a50b1SJeff Hugo nsp_syndrome->error_address[3], 460*c11a50b1SJeff Hugo nsp_syndrome->error_address[4], 461*c11a50b1SJeff Hugo nsp_syndrome->error_address[5], 462*c11a50b1SJeff Hugo nsp_syndrome->error_address[6], 463*c11a50b1SJeff Hugo nsp_syndrome->error_address[7]); 464*c11a50b1SJeff Hugo break; 465*c11a50b1SJeff Hugo case TSENS: 466*c11a50b1SJeff Hugo if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) { 467*c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n", 468*c11a50b1SJeff Hugo tsens_syndrome->threshold_type); 469*c11a50b1SJeff Hugo break; 470*c11a50b1SJeff Hugo } 471*c11a50b1SJeff Hugo 472*c11a50b1SJeff Hugo if (msg->err_type) 473*c11a50b1SJeff Hugo class = "Fatal"; 474*c11a50b1SJeff Hugo else if (tsens_syndrome->threshold_type) 475*c11a50b1SJeff Hugo class = "Critical"; 476*c11a50b1SJeff Hugo else 477*c11a50b1SJeff Hugo class = "Warning"; 478*c11a50b1SJeff Hugo 479*c11a50b1SJeff Hugo pci_printk(level, qdev->pdev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n %s threshold\n %d deg C\n", 480*c11a50b1SJeff Hugo class, 481*c11a50b1SJeff Hugo err_type_str[msg->err_type], 482*c11a50b1SJeff Hugo "error from", 483*c11a50b1SJeff Hugo err_src_str[msg->source], 484*c11a50b1SJeff Hugo msg->err_threshold, 485*c11a50b1SJeff Hugo threshold_type_str[tsens_syndrome->threshold_type], 486*c11a50b1SJeff Hugo tsens_syndrome->temp); 487*c11a50b1SJeff Hugo break; 488*c11a50b1SJeff Hugo } 489*c11a50b1SJeff Hugo 490*c11a50b1SJeff Hugo /* Uncorrectable errors are fatal */ 491*c11a50b1SJeff Hugo if (msg->err_type == UE) 492*c11a50b1SJeff Hugo mhi_soc_reset(qdev->mhi_cntrl); 493*c11a50b1SJeff Hugo 494*c11a50b1SJeff Hugo switch (msg->err_type) { 495*c11a50b1SJeff Hugo case CE: 496*c11a50b1SJeff Hugo if (qdev->ce_count != UINT_MAX) 497*c11a50b1SJeff Hugo qdev->ce_count++; 498*c11a50b1SJeff Hugo break; 499*c11a50b1SJeff Hugo case UE: 500*c11a50b1SJeff Hugo if (qdev->ce_count != UINT_MAX) 501*c11a50b1SJeff Hugo qdev->ue_count++; 502*c11a50b1SJeff Hugo break; 503*c11a50b1SJeff Hugo case UE_NF: 504*c11a50b1SJeff Hugo if (qdev->ce_count != UINT_MAX) 505*c11a50b1SJeff Hugo qdev->ue_nf_count++; 506*c11a50b1SJeff Hugo break; 507*c11a50b1SJeff Hugo default: 508*c11a50b1SJeff Hugo /* not possible */ 509*c11a50b1SJeff Hugo break; 510*c11a50b1SJeff Hugo } 511*c11a50b1SJeff Hugo } 512*c11a50b1SJeff Hugo 513*c11a50b1SJeff Hugo static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf) 514*c11a50b1SJeff Hugo { 515*c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); 516*c11a50b1SJeff Hugo 517*c11a50b1SJeff Hugo return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count); 518*c11a50b1SJeff Hugo } 519*c11a50b1SJeff Hugo 520*c11a50b1SJeff Hugo static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf) 521*c11a50b1SJeff Hugo { 522*c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); 523*c11a50b1SJeff Hugo 524*c11a50b1SJeff Hugo return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count); 525*c11a50b1SJeff Hugo } 526*c11a50b1SJeff Hugo 527*c11a50b1SJeff Hugo static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf) 528*c11a50b1SJeff Hugo { 529*c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); 530*c11a50b1SJeff Hugo 531*c11a50b1SJeff Hugo return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count); 532*c11a50b1SJeff Hugo } 533*c11a50b1SJeff Hugo 534*c11a50b1SJeff Hugo static DEVICE_ATTR_RO(ce_count); 535*c11a50b1SJeff Hugo static DEVICE_ATTR_RO(ue_count); 536*c11a50b1SJeff Hugo static DEVICE_ATTR_RO(ue_nonfatal_count); 537*c11a50b1SJeff Hugo 538*c11a50b1SJeff Hugo static struct attribute *ras_attrs[] = { 539*c11a50b1SJeff Hugo &dev_attr_ce_count.attr, 540*c11a50b1SJeff Hugo &dev_attr_ue_count.attr, 541*c11a50b1SJeff Hugo &dev_attr_ue_nonfatal_count.attr, 542*c11a50b1SJeff Hugo NULL, 543*c11a50b1SJeff Hugo }; 544*c11a50b1SJeff Hugo 545*c11a50b1SJeff Hugo static struct attribute_group ras_group = { 546*c11a50b1SJeff Hugo .attrs = ras_attrs, 547*c11a50b1SJeff Hugo }; 548*c11a50b1SJeff Hugo 549*c11a50b1SJeff Hugo static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) 550*c11a50b1SJeff Hugo { 551*c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); 552*c11a50b1SJeff Hugo struct ras_data *resp; 553*c11a50b1SJeff Hugo int ret; 554*c11a50b1SJeff Hugo 555*c11a50b1SJeff Hugo ret = mhi_prepare_for_transfer(mhi_dev); 556*c11a50b1SJeff Hugo if (ret) 557*c11a50b1SJeff Hugo return ret; 558*c11a50b1SJeff Hugo 559*c11a50b1SJeff Hugo resp = kzalloc(sizeof(*resp), GFP_KERNEL); 560*c11a50b1SJeff Hugo if (!resp) { 561*c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 562*c11a50b1SJeff Hugo return -ENOMEM; 563*c11a50b1SJeff Hugo } 564*c11a50b1SJeff Hugo 565*c11a50b1SJeff Hugo ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT); 566*c11a50b1SJeff Hugo if (ret) { 567*c11a50b1SJeff Hugo kfree(resp); 568*c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 569*c11a50b1SJeff Hugo return ret; 570*c11a50b1SJeff Hugo } 571*c11a50b1SJeff Hugo 572*c11a50b1SJeff Hugo ret = device_add_group(&qdev->pdev->dev, &ras_group); 573*c11a50b1SJeff Hugo if (ret) { 574*c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 575*c11a50b1SJeff Hugo pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret); 576*c11a50b1SJeff Hugo return ret; 577*c11a50b1SJeff Hugo } 578*c11a50b1SJeff Hugo 579*c11a50b1SJeff Hugo dev_set_drvdata(&mhi_dev->dev, qdev); 580*c11a50b1SJeff Hugo qdev->ras_ch = mhi_dev; 581*c11a50b1SJeff Hugo 582*c11a50b1SJeff Hugo return ret; 583*c11a50b1SJeff Hugo } 584*c11a50b1SJeff Hugo 585*c11a50b1SJeff Hugo static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev) 586*c11a50b1SJeff Hugo { 587*c11a50b1SJeff Hugo struct qaic_device *qdev; 588*c11a50b1SJeff Hugo 589*c11a50b1SJeff Hugo qdev = dev_get_drvdata(&mhi_dev->dev); 590*c11a50b1SJeff Hugo qdev->ras_ch = NULL; 591*c11a50b1SJeff Hugo device_remove_group(&qdev->pdev->dev, &ras_group); 592*c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 593*c11a50b1SJeff Hugo } 594*c11a50b1SJeff Hugo 595*c11a50b1SJeff Hugo static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {} 596*c11a50b1SJeff Hugo 597*c11a50b1SJeff Hugo static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) 598*c11a50b1SJeff Hugo { 599*c11a50b1SJeff Hugo struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); 600*c11a50b1SJeff Hugo struct ras_data *msg = mhi_result->buf_addr; 601*c11a50b1SJeff Hugo int ret; 602*c11a50b1SJeff Hugo 603*c11a50b1SJeff Hugo if (mhi_result->transaction_status) { 604*c11a50b1SJeff Hugo kfree(msg); 605*c11a50b1SJeff Hugo return; 606*c11a50b1SJeff Hugo } 607*c11a50b1SJeff Hugo 608*c11a50b1SJeff Hugo ras_msg_to_cpu(msg); 609*c11a50b1SJeff Hugo decode_ras_msg(qdev, msg); 610*c11a50b1SJeff Hugo 611*c11a50b1SJeff Hugo ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT); 612*c11a50b1SJeff Hugo if (ret) { 613*c11a50b1SJeff Hugo dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret); 614*c11a50b1SJeff Hugo kfree(msg); 615*c11a50b1SJeff Hugo } 616*c11a50b1SJeff Hugo } 617*c11a50b1SJeff Hugo 618*c11a50b1SJeff Hugo static const struct mhi_device_id qaic_ras_mhi_match_table[] = { 619*c11a50b1SJeff Hugo { .chan = "QAIC_STATUS", }, 620*c11a50b1SJeff Hugo {}, 621*c11a50b1SJeff Hugo }; 622*c11a50b1SJeff Hugo 623*c11a50b1SJeff Hugo static struct mhi_driver qaic_ras_mhi_driver = { 624*c11a50b1SJeff Hugo .id_table = qaic_ras_mhi_match_table, 625*c11a50b1SJeff Hugo .remove = qaic_ras_mhi_remove, 626*c11a50b1SJeff Hugo .probe = qaic_ras_mhi_probe, 627*c11a50b1SJeff Hugo .ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb, 628*c11a50b1SJeff Hugo .dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb, 629*c11a50b1SJeff Hugo .driver = { 630*c11a50b1SJeff Hugo .name = "qaic_ras", 631*c11a50b1SJeff Hugo }, 632*c11a50b1SJeff Hugo }; 633*c11a50b1SJeff Hugo 634*c11a50b1SJeff Hugo int qaic_ras_register(void) 635*c11a50b1SJeff Hugo { 636*c11a50b1SJeff Hugo return mhi_driver_register(&qaic_ras_mhi_driver); 637*c11a50b1SJeff Hugo } 638*c11a50b1SJeff Hugo 639*c11a50b1SJeff Hugo void qaic_ras_unregister(void) 640*c11a50b1SJeff Hugo { 641*c11a50b1SJeff Hugo mhi_driver_unregister(&qaic_ras_mhi_driver); 642*c11a50b1SJeff Hugo } 643