1c11a50b1SJeff Hugo // SPDX-License-Identifier: GPL-2.0-only 2c11a50b1SJeff Hugo 3c11a50b1SJeff Hugo /* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ 4c11a50b1SJeff Hugo /* Copyright (c) 2022-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ 5c11a50b1SJeff Hugo /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ 6c11a50b1SJeff Hugo 7c11a50b1SJeff Hugo #include <asm/byteorder.h> 8c11a50b1SJeff Hugo #include <linux/device.h> 9c11a50b1SJeff Hugo #include <linux/kernel.h> 10c11a50b1SJeff Hugo #include <linux/mhi.h> 11c11a50b1SJeff Hugo 12c11a50b1SJeff Hugo #include "qaic.h" 13c11a50b1SJeff Hugo #include "qaic_ras.h" 14c11a50b1SJeff Hugo 15c11a50b1SJeff Hugo #define MAGIC 0x55AA 16c11a50b1SJeff Hugo #define VERSION 0x2 17c11a50b1SJeff Hugo #define HDR_SZ 12 18c11a50b1SJeff Hugo #define NUM_TEMP_LVL 3 19c11a50b1SJeff Hugo #define POWER_BREAK BIT(0) 20c11a50b1SJeff Hugo 21c11a50b1SJeff Hugo enum msg_type { 22c11a50b1SJeff Hugo MSG_PUSH, /* async push from device */ 23c11a50b1SJeff Hugo MSG_REQ, /* sync request to device */ 24c11a50b1SJeff Hugo MSG_RESP, /* sync response from device */ 25c11a50b1SJeff Hugo }; 26c11a50b1SJeff Hugo 27c11a50b1SJeff Hugo enum err_type { 28c11a50b1SJeff Hugo CE, /* correctable error */ 29c11a50b1SJeff Hugo UE, /* uncorrectable error */ 30c11a50b1SJeff Hugo UE_NF, /* uncorrectable error that is non-fatal, expect a disruption */ 31c11a50b1SJeff Hugo ERR_TYPE_MAX, 32c11a50b1SJeff Hugo }; 33c11a50b1SJeff Hugo 34c11a50b1SJeff Hugo static const char * const err_type_str[] = { 35c11a50b1SJeff Hugo [CE] = "Correctable", 36c11a50b1SJeff Hugo [UE] = "Uncorrectable", 37c11a50b1SJeff Hugo [UE_NF] = "Uncorrectable Non-Fatal", 38c11a50b1SJeff Hugo }; 39c11a50b1SJeff Hugo 40c11a50b1SJeff Hugo static const char * const err_class_str[] = { 41c11a50b1SJeff Hugo [CE] = "Warning", 42c11a50b1SJeff Hugo [UE] = "Fatal", 43c11a50b1SJeff Hugo [UE_NF] = "Warning", 44c11a50b1SJeff Hugo }; 45c11a50b1SJeff Hugo 46c11a50b1SJeff Hugo enum err_source { 47c11a50b1SJeff Hugo SOC_MEM, 48c11a50b1SJeff Hugo PCIE, 49c11a50b1SJeff Hugo DDR, 50c11a50b1SJeff Hugo SYS_BUS1, 51c11a50b1SJeff Hugo SYS_BUS2, 52c11a50b1SJeff Hugo NSP_MEM, 53c11a50b1SJeff Hugo TSENS, 54c11a50b1SJeff Hugo }; 55c11a50b1SJeff Hugo 56c11a50b1SJeff Hugo static const char * const err_src_str[TSENS + 1] = { 57c11a50b1SJeff Hugo [SOC_MEM] = "SoC Memory", 58c11a50b1SJeff Hugo [PCIE] = "PCIE", 59c11a50b1SJeff Hugo [DDR] = "DDR", 60c11a50b1SJeff Hugo [SYS_BUS1] = "System Bus source 1", 61c11a50b1SJeff Hugo [SYS_BUS2] = "System Bus source 2", 62c11a50b1SJeff Hugo [NSP_MEM] = "NSP Memory", 63c11a50b1SJeff Hugo [TSENS] = "Temperature Sensors", 64c11a50b1SJeff Hugo }; 65c11a50b1SJeff Hugo 66c11a50b1SJeff Hugo struct ras_data { 67c11a50b1SJeff Hugo /* header start */ 68c11a50b1SJeff Hugo /* Magic number to validate the message */ 69c11a50b1SJeff Hugo u16 magic; 70c11a50b1SJeff Hugo /* RAS version number */ 71c11a50b1SJeff Hugo u16 ver; 72c11a50b1SJeff Hugo u32 seq_num; 73c11a50b1SJeff Hugo /* RAS message type */ 74c11a50b1SJeff Hugo u8 type; 75c11a50b1SJeff Hugo u8 id; 76c11a50b1SJeff Hugo /* Size of RAS message without the header in byte */ 77c11a50b1SJeff Hugo u16 len; 78c11a50b1SJeff Hugo /* header end */ 79c11a50b1SJeff Hugo s32 result; 80c11a50b1SJeff Hugo /* 81c11a50b1SJeff Hugo * Error source 82c11a50b1SJeff Hugo * 0 : SoC Memory 83c11a50b1SJeff Hugo * 1 : PCIE 84c11a50b1SJeff Hugo * 2 : DDR 85c11a50b1SJeff Hugo * 3 : System Bus source 1 86c11a50b1SJeff Hugo * 4 : System Bus source 2 87c11a50b1SJeff Hugo * 5 : NSP Memory 88c11a50b1SJeff Hugo * 6 : Temperature Sensors 89c11a50b1SJeff Hugo */ 90c11a50b1SJeff Hugo u32 source; 91c11a50b1SJeff Hugo /* 92c11a50b1SJeff Hugo * Stores the error type, there are three types of error in RAS 93c11a50b1SJeff Hugo * 0 : correctable error (CE) 94c11a50b1SJeff Hugo * 1 : uncorrectable error (UE) 95c11a50b1SJeff Hugo * 2 : uncorrectable error that is non-fatal (UE_NF) 96c11a50b1SJeff Hugo */ 97c11a50b1SJeff Hugo u32 err_type; 98c11a50b1SJeff Hugo u32 err_threshold; 99c11a50b1SJeff Hugo u32 ce_count; 100c11a50b1SJeff Hugo u32 ue_count; 101c11a50b1SJeff Hugo u32 intr_num; 102c11a50b1SJeff Hugo /* Data specific to error source */ 103c11a50b1SJeff Hugo u8 syndrome[64]; 104c11a50b1SJeff Hugo } __packed; 105c11a50b1SJeff Hugo 106c11a50b1SJeff Hugo struct soc_mem_syndrome { 107c11a50b1SJeff Hugo u64 error_address[8]; 108c11a50b1SJeff Hugo } __packed; 109c11a50b1SJeff Hugo 110c11a50b1SJeff Hugo struct nsp_mem_syndrome { 111c11a50b1SJeff Hugo u32 error_address[8]; 112c11a50b1SJeff Hugo u8 nsp_id; 113c11a50b1SJeff Hugo } __packed; 114c11a50b1SJeff Hugo 115c11a50b1SJeff Hugo struct ddr_syndrome { 116c11a50b1SJeff Hugo u32 count; 117c11a50b1SJeff Hugo u32 irq_status; 118c11a50b1SJeff Hugo u32 data_31_0[2]; 119c11a50b1SJeff Hugo u32 data_63_32[2]; 120c11a50b1SJeff Hugo u32 data_95_64[2]; 121c11a50b1SJeff Hugo u32 data_127_96[2]; 122c11a50b1SJeff Hugo u32 addr_lsb; 123c11a50b1SJeff Hugo u16 addr_msb; 124c11a50b1SJeff Hugo u16 parity_bits; 125c11a50b1SJeff Hugo u16 instance; 126c11a50b1SJeff Hugo u16 err_type; 127c11a50b1SJeff Hugo } __packed; 128c11a50b1SJeff Hugo 129c11a50b1SJeff Hugo struct tsens_syndrome { 130c11a50b1SJeff Hugo u32 threshold_type; 131c11a50b1SJeff Hugo s32 temp; 132c11a50b1SJeff Hugo } __packed; 133c11a50b1SJeff Hugo 134c11a50b1SJeff Hugo struct sysbus1_syndrome { 135c11a50b1SJeff Hugo u32 slave; 136c11a50b1SJeff Hugo u32 err_type; 137c11a50b1SJeff Hugo u16 addr[8]; 138c11a50b1SJeff Hugo u8 instance; 139c11a50b1SJeff Hugo } __packed; 140c11a50b1SJeff Hugo 141c11a50b1SJeff Hugo struct sysbus2_syndrome { 142c11a50b1SJeff Hugo u32 lsb3; 143c11a50b1SJeff Hugo u32 msb3; 144c11a50b1SJeff Hugo u32 lsb2; 145c11a50b1SJeff Hugo u32 msb2; 146c11a50b1SJeff Hugo u32 ext_id; 147c11a50b1SJeff Hugo u16 path; 148c11a50b1SJeff Hugo u16 op_type; 149c11a50b1SJeff Hugo u16 len; 150c11a50b1SJeff Hugo u16 redirect; 151c11a50b1SJeff Hugo u8 valid; 152c11a50b1SJeff Hugo u8 word_error; 153c11a50b1SJeff Hugo u8 non_secure; 154c11a50b1SJeff Hugo u8 opc; 155c11a50b1SJeff Hugo u8 error_code; 156c11a50b1SJeff Hugo u8 trans_type; 157c11a50b1SJeff Hugo u8 addr_space; 158c11a50b1SJeff Hugo u8 instance; 159c11a50b1SJeff Hugo } __packed; 160c11a50b1SJeff Hugo 161c11a50b1SJeff Hugo struct pcie_syndrome { 162c11a50b1SJeff Hugo /* CE info */ 163c11a50b1SJeff Hugo u32 bad_tlp; 164c11a50b1SJeff Hugo u32 bad_dllp; 165c11a50b1SJeff Hugo u32 replay_rollover; 166c11a50b1SJeff Hugo u32 replay_timeout; 167c11a50b1SJeff Hugo u32 rx_err; 168c11a50b1SJeff Hugo u32 internal_ce_count; 169c11a50b1SJeff Hugo /* UE_NF info */ 170c11a50b1SJeff Hugo u32 fc_timeout; 171c11a50b1SJeff Hugo u32 poison_tlp; 172c11a50b1SJeff Hugo u32 ecrc_err; 173c11a50b1SJeff Hugo u32 unsupported_req; 174c11a50b1SJeff Hugo u32 completer_abort; 175c11a50b1SJeff Hugo u32 completion_timeout; 176c11a50b1SJeff Hugo /* UE info */ 177c11a50b1SJeff Hugo u32 addr; 178c11a50b1SJeff Hugo u8 index; 179c11a50b1SJeff Hugo /* 180c11a50b1SJeff Hugo * Flag to indicate specific event of PCIe 181c11a50b1SJeff Hugo * BIT(0): Power break (low power) 182c11a50b1SJeff Hugo * BIT(1) to BIT(7): Reserved 183c11a50b1SJeff Hugo */ 184c11a50b1SJeff Hugo u8 flag; 185c11a50b1SJeff Hugo } __packed; 186c11a50b1SJeff Hugo 187c11a50b1SJeff Hugo static const char * const threshold_type_str[NUM_TEMP_LVL] = { 188c11a50b1SJeff Hugo [0] = "lower", 189c11a50b1SJeff Hugo [1] = "upper", 190c11a50b1SJeff Hugo [2] = "critical", 191c11a50b1SJeff Hugo }; 192c11a50b1SJeff Hugo 193c11a50b1SJeff Hugo static void ras_msg_to_cpu(struct ras_data *msg) 194c11a50b1SJeff Hugo { 195c11a50b1SJeff Hugo struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; 196c11a50b1SJeff Hugo struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; 197c11a50b1SJeff Hugo struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; 198c11a50b1SJeff Hugo struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; 199c11a50b1SJeff Hugo struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; 200c11a50b1SJeff Hugo struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; 201c11a50b1SJeff Hugo struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; 202c11a50b1SJeff Hugo int i; 203c11a50b1SJeff Hugo 204c11a50b1SJeff Hugo le16_to_cpus(&msg->magic); 205c11a50b1SJeff Hugo le16_to_cpus(&msg->ver); 206c11a50b1SJeff Hugo le32_to_cpus(&msg->seq_num); 207c11a50b1SJeff Hugo le16_to_cpus(&msg->len); 208c11a50b1SJeff Hugo le32_to_cpus(&msg->result); 209c11a50b1SJeff Hugo le32_to_cpus(&msg->source); 210c11a50b1SJeff Hugo le32_to_cpus(&msg->err_type); 211c11a50b1SJeff Hugo le32_to_cpus(&msg->err_threshold); 212c11a50b1SJeff Hugo le32_to_cpus(&msg->ce_count); 213c11a50b1SJeff Hugo le32_to_cpus(&msg->ue_count); 214c11a50b1SJeff Hugo le32_to_cpus(&msg->intr_num); 215c11a50b1SJeff Hugo 216c11a50b1SJeff Hugo switch (msg->source) { 217c11a50b1SJeff Hugo case SOC_MEM: 218c11a50b1SJeff Hugo for (i = 0; i < 8; i++) 219c11a50b1SJeff Hugo le64_to_cpus(&soc_syndrome->error_address[i]); 220c11a50b1SJeff Hugo break; 221c11a50b1SJeff Hugo case PCIE: 222c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->bad_tlp); 223c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->bad_dllp); 224c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->replay_rollover); 225c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->replay_timeout); 226c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->rx_err); 227c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->internal_ce_count); 228c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->fc_timeout); 229c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->poison_tlp); 230c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->ecrc_err); 231c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->unsupported_req); 232c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->completer_abort); 233c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->completion_timeout); 234c11a50b1SJeff Hugo le32_to_cpus(&pcie_syndrome->addr); 235c11a50b1SJeff Hugo break; 236c11a50b1SJeff Hugo case DDR: 237c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->instance); 238c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->err_type); 239c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->count); 240c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->irq_status); 241c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_31_0[0]); 242c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_31_0[1]); 243c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_63_32[0]); 244c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_63_32[1]); 245c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_95_64[0]); 246c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_95_64[1]); 247c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_127_96[0]); 248c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->data_127_96[1]); 249c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->parity_bits); 250c11a50b1SJeff Hugo le16_to_cpus(&ddr_syndrome->addr_msb); 251c11a50b1SJeff Hugo le32_to_cpus(&ddr_syndrome->addr_lsb); 252c11a50b1SJeff Hugo break; 253c11a50b1SJeff Hugo case SYS_BUS1: 254c11a50b1SJeff Hugo le32_to_cpus(&sysbus1_syndrome->slave); 255c11a50b1SJeff Hugo le32_to_cpus(&sysbus1_syndrome->err_type); 256c11a50b1SJeff Hugo for (i = 0; i < 8; i++) 257c11a50b1SJeff Hugo le16_to_cpus(&sysbus1_syndrome->addr[i]); 258c11a50b1SJeff Hugo break; 259c11a50b1SJeff Hugo case SYS_BUS2: 260c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->op_type); 261c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->len); 262c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->redirect); 263c11a50b1SJeff Hugo le16_to_cpus(&sysbus2_syndrome->path); 264c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->ext_id); 265c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->lsb2); 266c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->msb2); 267c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->lsb3); 268c11a50b1SJeff Hugo le32_to_cpus(&sysbus2_syndrome->msb3); 269c11a50b1SJeff Hugo break; 270c11a50b1SJeff Hugo case NSP_MEM: 271c11a50b1SJeff Hugo for (i = 0; i < 8; i++) 272c11a50b1SJeff Hugo le32_to_cpus(&nsp_syndrome->error_address[i]); 273c11a50b1SJeff Hugo break; 274c11a50b1SJeff Hugo case TSENS: 275c11a50b1SJeff Hugo le32_to_cpus(&tsens_syndrome->threshold_type); 276c11a50b1SJeff Hugo le32_to_cpus(&tsens_syndrome->temp); 277c11a50b1SJeff Hugo break; 278c11a50b1SJeff Hugo } 279c11a50b1SJeff Hugo } 280c11a50b1SJeff Hugo 281c11a50b1SJeff Hugo static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg) 282c11a50b1SJeff Hugo { 283c11a50b1SJeff Hugo struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0]; 284c11a50b1SJeff Hugo struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0]; 285c11a50b1SJeff Hugo struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0]; 286c11a50b1SJeff Hugo struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0]; 287c11a50b1SJeff Hugo struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0]; 288c11a50b1SJeff Hugo struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0]; 289c11a50b1SJeff Hugo struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0]; 290c11a50b1SJeff Hugo char *class; 291c11a50b1SJeff Hugo char *level; 292c11a50b1SJeff Hugo 293c11a50b1SJeff Hugo if (msg->magic != MAGIC) { 294c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic); 295c11a50b1SJeff Hugo return; 296c11a50b1SJeff Hugo } 297c11a50b1SJeff Hugo 298c11a50b1SJeff Hugo if (!msg->ver || msg->ver > VERSION) { 299c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver); 300c11a50b1SJeff Hugo return; 301c11a50b1SJeff Hugo } 302c11a50b1SJeff Hugo 303c11a50b1SJeff Hugo if (msg->type != MSG_PUSH) { 304c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n"); 305c11a50b1SJeff Hugo return; 306c11a50b1SJeff Hugo } 307c11a50b1SJeff Hugo 308c11a50b1SJeff Hugo if (msg->len != sizeof(*msg) - HDR_SZ) { 309c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len); 310c11a50b1SJeff Hugo return; 311c11a50b1SJeff Hugo } 312c11a50b1SJeff Hugo 313c11a50b1SJeff Hugo if (msg->err_type >= ERR_TYPE_MAX) { 314c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type); 315c11a50b1SJeff Hugo return; 316c11a50b1SJeff Hugo } 317c11a50b1SJeff Hugo 318c11a50b1SJeff Hugo if (msg->err_type == UE) 319c11a50b1SJeff Hugo level = KERN_ERR; 320c11a50b1SJeff Hugo else 321c11a50b1SJeff Hugo level = KERN_WARNING; 322c11a50b1SJeff Hugo 323c11a50b1SJeff Hugo switch (msg->source) { 324c11a50b1SJeff Hugo case SOC_MEM: 325*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n 0x%llx\n", 326c11a50b1SJeff Hugo err_class_str[msg->err_type], 327c11a50b1SJeff Hugo err_type_str[msg->err_type], 328c11a50b1SJeff Hugo "error from", 329c11a50b1SJeff Hugo err_src_str[msg->source], 330c11a50b1SJeff Hugo msg->err_threshold, 331c11a50b1SJeff Hugo soc_syndrome->error_address[0], 332c11a50b1SJeff Hugo soc_syndrome->error_address[1], 333c11a50b1SJeff Hugo soc_syndrome->error_address[2], 334c11a50b1SJeff Hugo soc_syndrome->error_address[3], 335c11a50b1SJeff Hugo soc_syndrome->error_address[4], 336c11a50b1SJeff Hugo soc_syndrome->error_address[5], 337c11a50b1SJeff Hugo soc_syndrome->error_address[6], 338c11a50b1SJeff Hugo soc_syndrome->error_address[7]); 339c11a50b1SJeff Hugo break; 340c11a50b1SJeff Hugo case PCIE: 341*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n", 342c11a50b1SJeff Hugo err_class_str[msg->err_type], 343c11a50b1SJeff Hugo err_type_str[msg->err_type], 344c11a50b1SJeff Hugo "error from", 345c11a50b1SJeff Hugo err_src_str[msg->source], 346c11a50b1SJeff Hugo msg->err_threshold); 347c11a50b1SJeff Hugo 348c11a50b1SJeff Hugo switch (msg->err_type) { 349c11a50b1SJeff Hugo case CE: 350c11a50b1SJeff Hugo /* 351*c5ae936dSJeff Hugo * Modeled after AER prints. This continues the dev_printk() from a few 352c11a50b1SJeff Hugo * lines up. We reduce duplication of code, but also avoid re-printing the 353c11a50b1SJeff Hugo * PCI device info so that the end result looks uniform to the log user. 354c11a50b1SJeff Hugo */ 355c11a50b1SJeff Hugo printk(KERN_WARNING pr_fmt("Syndrome:\n Bad TLP count %d\n Bad DLLP count %d\n Replay Rollover count %d\n Replay Timeout count %d\n Recv Error count %d\n Internal CE count %d\n"), 356c11a50b1SJeff Hugo pcie_syndrome->bad_tlp, 357c11a50b1SJeff Hugo pcie_syndrome->bad_dllp, 358c11a50b1SJeff Hugo pcie_syndrome->replay_rollover, 359c11a50b1SJeff Hugo pcie_syndrome->replay_timeout, 360c11a50b1SJeff Hugo pcie_syndrome->rx_err, 361c11a50b1SJeff Hugo pcie_syndrome->internal_ce_count); 362c11a50b1SJeff Hugo if (msg->ver > 0x1) 363c11a50b1SJeff Hugo pr_warn(" Power break %s\n", 364c11a50b1SJeff Hugo pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF"); 365c11a50b1SJeff Hugo break; 366c11a50b1SJeff Hugo case UE: 367c11a50b1SJeff Hugo printk(KERN_ERR pr_fmt("Syndrome:\n Index %d\n Address 0x%x\n"), 368c11a50b1SJeff Hugo pcie_syndrome->index, pcie_syndrome->addr); 369c11a50b1SJeff Hugo break; 370c11a50b1SJeff Hugo case UE_NF: 371c11a50b1SJeff Hugo printk(KERN_WARNING pr_fmt("Syndrome:\n FC timeout count %d\n Poisoned TLP count %d\n ECRC error count %d\n Unsupported request count %d\n Completer abort count %d\n Completion timeout count %d\n"), 372c11a50b1SJeff Hugo pcie_syndrome->fc_timeout, 373c11a50b1SJeff Hugo pcie_syndrome->poison_tlp, 374c11a50b1SJeff Hugo pcie_syndrome->ecrc_err, 375c11a50b1SJeff Hugo pcie_syndrome->unsupported_req, 376c11a50b1SJeff Hugo pcie_syndrome->completer_abort, 377c11a50b1SJeff Hugo pcie_syndrome->completion_timeout); 378c11a50b1SJeff Hugo break; 379c11a50b1SJeff Hugo default: 380c11a50b1SJeff Hugo break; 381c11a50b1SJeff Hugo } 382c11a50b1SJeff Hugo break; 383c11a50b1SJeff Hugo case DDR: 384*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n Instance %d\n Count %d\n Data 31_0 0x%x 0x%x\n Data 63_32 0x%x 0x%x\n Data 95_64 0x%x 0x%x\n Data 127_96 0x%x 0x%x\n Parity bits 0x%x\n Address msb 0x%x\n Address lsb 0x%x\n", 385c11a50b1SJeff Hugo err_class_str[msg->err_type], 386c11a50b1SJeff Hugo err_type_str[msg->err_type], 387c11a50b1SJeff Hugo "error from", 388c11a50b1SJeff Hugo err_src_str[msg->source], 389c11a50b1SJeff Hugo msg->err_threshold, 390c11a50b1SJeff Hugo ddr_syndrome->instance, 391c11a50b1SJeff Hugo ddr_syndrome->count, 392c11a50b1SJeff Hugo ddr_syndrome->data_31_0[1], 393c11a50b1SJeff Hugo ddr_syndrome->data_31_0[0], 394c11a50b1SJeff Hugo ddr_syndrome->data_63_32[1], 395c11a50b1SJeff Hugo ddr_syndrome->data_63_32[0], 396c11a50b1SJeff Hugo ddr_syndrome->data_95_64[1], 397c11a50b1SJeff Hugo ddr_syndrome->data_95_64[0], 398c11a50b1SJeff Hugo ddr_syndrome->data_127_96[1], 399c11a50b1SJeff Hugo ddr_syndrome->data_127_96[0], 400c11a50b1SJeff Hugo ddr_syndrome->parity_bits, 401c11a50b1SJeff Hugo ddr_syndrome->addr_msb, 402c11a50b1SJeff Hugo ddr_syndrome->addr_lsb); 403c11a50b1SJeff Hugo break; 404c11a50b1SJeff Hugo case SYS_BUS1: 405*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n %s\n err_type %d\n address0 0x%x\n address1 0x%x\n address2 0x%x\n address3 0x%x\n address4 0x%x\n address5 0x%x\n address6 0x%x\n address7 0x%x\n", 406c11a50b1SJeff Hugo err_class_str[msg->err_type], 407c11a50b1SJeff Hugo err_type_str[msg->err_type], 408c11a50b1SJeff Hugo "error from", 409c11a50b1SJeff Hugo err_src_str[msg->source], 410c11a50b1SJeff Hugo msg->err_threshold, 411c11a50b1SJeff Hugo sysbus1_syndrome->instance, 412c11a50b1SJeff Hugo sysbus1_syndrome->slave ? "Slave" : "Master", 413c11a50b1SJeff Hugo sysbus1_syndrome->err_type, 414c11a50b1SJeff Hugo sysbus1_syndrome->addr[0], 415c11a50b1SJeff Hugo sysbus1_syndrome->addr[1], 416c11a50b1SJeff Hugo sysbus1_syndrome->addr[2], 417c11a50b1SJeff Hugo sysbus1_syndrome->addr[3], 418c11a50b1SJeff Hugo sysbus1_syndrome->addr[4], 419c11a50b1SJeff Hugo sysbus1_syndrome->addr[5], 420c11a50b1SJeff Hugo sysbus1_syndrome->addr[6], 421c11a50b1SJeff Hugo sysbus1_syndrome->addr[7]); 422c11a50b1SJeff Hugo break; 423c11a50b1SJeff Hugo case SYS_BUS2: 424*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n instance %d\n valid %d\n word error %d\n non-secure %d\n opc %d\n error code %d\n transaction type %d\n address space %d\n operation type %d\n len %d\n redirect %d\n path %d\n ext_id %d\n lsb2 %d\n msb2 %d\n lsb3 %d\n msb3 %d\n", 425c11a50b1SJeff Hugo err_class_str[msg->err_type], 426c11a50b1SJeff Hugo err_type_str[msg->err_type], 427c11a50b1SJeff Hugo "error from", 428c11a50b1SJeff Hugo err_src_str[msg->source], 429c11a50b1SJeff Hugo msg->err_threshold, 430c11a50b1SJeff Hugo sysbus2_syndrome->instance, 431c11a50b1SJeff Hugo sysbus2_syndrome->valid, 432c11a50b1SJeff Hugo sysbus2_syndrome->word_error, 433c11a50b1SJeff Hugo sysbus2_syndrome->non_secure, 434c11a50b1SJeff Hugo sysbus2_syndrome->opc, 435c11a50b1SJeff Hugo sysbus2_syndrome->error_code, 436c11a50b1SJeff Hugo sysbus2_syndrome->trans_type, 437c11a50b1SJeff Hugo sysbus2_syndrome->addr_space, 438c11a50b1SJeff Hugo sysbus2_syndrome->op_type, 439c11a50b1SJeff Hugo sysbus2_syndrome->len, 440c11a50b1SJeff Hugo sysbus2_syndrome->redirect, 441c11a50b1SJeff Hugo sysbus2_syndrome->path, 442c11a50b1SJeff Hugo sysbus2_syndrome->ext_id, 443c11a50b1SJeff Hugo sysbus2_syndrome->lsb2, 444c11a50b1SJeff Hugo sysbus2_syndrome->msb2, 445c11a50b1SJeff Hugo sysbus2_syndrome->lsb3, 446c11a50b1SJeff Hugo sysbus2_syndrome->msb3); 447c11a50b1SJeff Hugo break; 448c11a50b1SJeff Hugo case NSP_MEM: 449*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n NSP ID %d\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n 0x%x\n", 450c11a50b1SJeff Hugo err_class_str[msg->err_type], 451c11a50b1SJeff Hugo err_type_str[msg->err_type], 452c11a50b1SJeff Hugo "error from", 453c11a50b1SJeff Hugo err_src_str[msg->source], 454c11a50b1SJeff Hugo msg->err_threshold, 455c11a50b1SJeff Hugo nsp_syndrome->nsp_id, 456c11a50b1SJeff Hugo nsp_syndrome->error_address[0], 457c11a50b1SJeff Hugo nsp_syndrome->error_address[1], 458c11a50b1SJeff Hugo nsp_syndrome->error_address[2], 459c11a50b1SJeff Hugo nsp_syndrome->error_address[3], 460c11a50b1SJeff Hugo nsp_syndrome->error_address[4], 461c11a50b1SJeff Hugo nsp_syndrome->error_address[5], 462c11a50b1SJeff Hugo nsp_syndrome->error_address[6], 463c11a50b1SJeff Hugo nsp_syndrome->error_address[7]); 464c11a50b1SJeff Hugo break; 465c11a50b1SJeff Hugo case TSENS: 466c11a50b1SJeff Hugo if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) { 467c11a50b1SJeff Hugo pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n", 468c11a50b1SJeff Hugo tsens_syndrome->threshold_type); 469c11a50b1SJeff Hugo break; 470c11a50b1SJeff Hugo } 471c11a50b1SJeff Hugo 472c11a50b1SJeff Hugo if (msg->err_type) 473c11a50b1SJeff Hugo class = "Fatal"; 474c11a50b1SJeff Hugo else if (tsens_syndrome->threshold_type) 475c11a50b1SJeff Hugo class = "Critical"; 476c11a50b1SJeff Hugo else 477c11a50b1SJeff Hugo class = "Warning"; 478c11a50b1SJeff Hugo 479*c5ae936dSJeff Hugo dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n %s threshold\n %d deg C\n", 480c11a50b1SJeff Hugo class, 481c11a50b1SJeff Hugo err_type_str[msg->err_type], 482c11a50b1SJeff Hugo "error from", 483c11a50b1SJeff Hugo err_src_str[msg->source], 484c11a50b1SJeff Hugo msg->err_threshold, 485c11a50b1SJeff Hugo threshold_type_str[tsens_syndrome->threshold_type], 486c11a50b1SJeff Hugo tsens_syndrome->temp); 487c11a50b1SJeff Hugo break; 488c11a50b1SJeff Hugo } 489c11a50b1SJeff Hugo 490c11a50b1SJeff Hugo /* Uncorrectable errors are fatal */ 491c11a50b1SJeff Hugo if (msg->err_type == UE) 492c11a50b1SJeff Hugo mhi_soc_reset(qdev->mhi_cntrl); 493c11a50b1SJeff Hugo 494c11a50b1SJeff Hugo switch (msg->err_type) { 495c11a50b1SJeff Hugo case CE: 496c11a50b1SJeff Hugo if (qdev->ce_count != UINT_MAX) 497c11a50b1SJeff Hugo qdev->ce_count++; 498c11a50b1SJeff Hugo break; 499c11a50b1SJeff Hugo case UE: 500c11a50b1SJeff Hugo if (qdev->ce_count != UINT_MAX) 501c11a50b1SJeff Hugo qdev->ue_count++; 502c11a50b1SJeff Hugo break; 503c11a50b1SJeff Hugo case UE_NF: 504c11a50b1SJeff Hugo if (qdev->ce_count != UINT_MAX) 505c11a50b1SJeff Hugo qdev->ue_nf_count++; 506c11a50b1SJeff Hugo break; 507c11a50b1SJeff Hugo default: 508c11a50b1SJeff Hugo /* not possible */ 509c11a50b1SJeff Hugo break; 510c11a50b1SJeff Hugo } 511c11a50b1SJeff Hugo } 512c11a50b1SJeff Hugo 513c11a50b1SJeff Hugo static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf) 514c11a50b1SJeff Hugo { 515c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); 516c11a50b1SJeff Hugo 517c11a50b1SJeff Hugo return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ce_count); 518c11a50b1SJeff Hugo } 519c11a50b1SJeff Hugo 520c11a50b1SJeff Hugo static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf) 521c11a50b1SJeff Hugo { 522c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); 523c11a50b1SJeff Hugo 524c11a50b1SJeff Hugo return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_count); 525c11a50b1SJeff Hugo } 526c11a50b1SJeff Hugo 527c11a50b1SJeff Hugo static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf) 528c11a50b1SJeff Hugo { 529c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev)); 530c11a50b1SJeff Hugo 531c11a50b1SJeff Hugo return snprintf(buf, PAGE_SIZE, "%d\n", qdev->ue_nf_count); 532c11a50b1SJeff Hugo } 533c11a50b1SJeff Hugo 534c11a50b1SJeff Hugo static DEVICE_ATTR_RO(ce_count); 535c11a50b1SJeff Hugo static DEVICE_ATTR_RO(ue_count); 536c11a50b1SJeff Hugo static DEVICE_ATTR_RO(ue_nonfatal_count); 537c11a50b1SJeff Hugo 538c11a50b1SJeff Hugo static struct attribute *ras_attrs[] = { 539c11a50b1SJeff Hugo &dev_attr_ce_count.attr, 540c11a50b1SJeff Hugo &dev_attr_ue_count.attr, 541c11a50b1SJeff Hugo &dev_attr_ue_nonfatal_count.attr, 542c11a50b1SJeff Hugo NULL, 543c11a50b1SJeff Hugo }; 544c11a50b1SJeff Hugo 545c11a50b1SJeff Hugo static struct attribute_group ras_group = { 546c11a50b1SJeff Hugo .attrs = ras_attrs, 547c11a50b1SJeff Hugo }; 548c11a50b1SJeff Hugo 549c11a50b1SJeff Hugo static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) 550c11a50b1SJeff Hugo { 551c11a50b1SJeff Hugo struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev)); 552c11a50b1SJeff Hugo struct ras_data *resp; 553c11a50b1SJeff Hugo int ret; 554c11a50b1SJeff Hugo 555c11a50b1SJeff Hugo ret = mhi_prepare_for_transfer(mhi_dev); 556c11a50b1SJeff Hugo if (ret) 557c11a50b1SJeff Hugo return ret; 558c11a50b1SJeff Hugo 559c11a50b1SJeff Hugo resp = kzalloc(sizeof(*resp), GFP_KERNEL); 560c11a50b1SJeff Hugo if (!resp) { 561c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 562c11a50b1SJeff Hugo return -ENOMEM; 563c11a50b1SJeff Hugo } 564c11a50b1SJeff Hugo 565c11a50b1SJeff Hugo ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT); 566c11a50b1SJeff Hugo if (ret) { 567c11a50b1SJeff Hugo kfree(resp); 568c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 569c11a50b1SJeff Hugo return ret; 570c11a50b1SJeff Hugo } 571c11a50b1SJeff Hugo 572c11a50b1SJeff Hugo ret = device_add_group(&qdev->pdev->dev, &ras_group); 573c11a50b1SJeff Hugo if (ret) { 574c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 575c11a50b1SJeff Hugo pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret); 576c11a50b1SJeff Hugo return ret; 577c11a50b1SJeff Hugo } 578c11a50b1SJeff Hugo 579c11a50b1SJeff Hugo dev_set_drvdata(&mhi_dev->dev, qdev); 580c11a50b1SJeff Hugo qdev->ras_ch = mhi_dev; 581c11a50b1SJeff Hugo 582c11a50b1SJeff Hugo return ret; 583c11a50b1SJeff Hugo } 584c11a50b1SJeff Hugo 585c11a50b1SJeff Hugo static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev) 586c11a50b1SJeff Hugo { 587c11a50b1SJeff Hugo struct qaic_device *qdev; 588c11a50b1SJeff Hugo 589c11a50b1SJeff Hugo qdev = dev_get_drvdata(&mhi_dev->dev); 590c11a50b1SJeff Hugo qdev->ras_ch = NULL; 591c11a50b1SJeff Hugo device_remove_group(&qdev->pdev->dev, &ras_group); 592c11a50b1SJeff Hugo mhi_unprepare_from_transfer(mhi_dev); 593c11a50b1SJeff Hugo } 594c11a50b1SJeff Hugo 595c11a50b1SJeff Hugo static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {} 596c11a50b1SJeff Hugo 597c11a50b1SJeff Hugo static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) 598c11a50b1SJeff Hugo { 599c11a50b1SJeff Hugo struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev); 600c11a50b1SJeff Hugo struct ras_data *msg = mhi_result->buf_addr; 601c11a50b1SJeff Hugo int ret; 602c11a50b1SJeff Hugo 603c11a50b1SJeff Hugo if (mhi_result->transaction_status) { 604c11a50b1SJeff Hugo kfree(msg); 605c11a50b1SJeff Hugo return; 606c11a50b1SJeff Hugo } 607c11a50b1SJeff Hugo 608c11a50b1SJeff Hugo ras_msg_to_cpu(msg); 609c11a50b1SJeff Hugo decode_ras_msg(qdev, msg); 610c11a50b1SJeff Hugo 611c11a50b1SJeff Hugo ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT); 612c11a50b1SJeff Hugo if (ret) { 613c11a50b1SJeff Hugo dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret); 614c11a50b1SJeff Hugo kfree(msg); 615c11a50b1SJeff Hugo } 616c11a50b1SJeff Hugo } 617c11a50b1SJeff Hugo 618c11a50b1SJeff Hugo static const struct mhi_device_id qaic_ras_mhi_match_table[] = { 619c11a50b1SJeff Hugo { .chan = "QAIC_STATUS", }, 620c11a50b1SJeff Hugo {}, 621c11a50b1SJeff Hugo }; 622c11a50b1SJeff Hugo 623c11a50b1SJeff Hugo static struct mhi_driver qaic_ras_mhi_driver = { 624c11a50b1SJeff Hugo .id_table = qaic_ras_mhi_match_table, 625c11a50b1SJeff Hugo .remove = qaic_ras_mhi_remove, 626c11a50b1SJeff Hugo .probe = qaic_ras_mhi_probe, 627c11a50b1SJeff Hugo .ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb, 628c11a50b1SJeff Hugo .dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb, 629c11a50b1SJeff Hugo .driver = { 630c11a50b1SJeff Hugo .name = "qaic_ras", 631c11a50b1SJeff Hugo }, 632c11a50b1SJeff Hugo }; 633c11a50b1SJeff Hugo 634c11a50b1SJeff Hugo int qaic_ras_register(void) 635c11a50b1SJeff Hugo { 636c11a50b1SJeff Hugo return mhi_driver_register(&qaic_ras_mhi_driver); 637c11a50b1SJeff Hugo } 638c11a50b1SJeff Hugo 639c11a50b1SJeff Hugo void qaic_ras_unregister(void) 640c11a50b1SJeff Hugo { 641c11a50b1SJeff Hugo mhi_driver_unregister(&qaic_ras_mhi_driver); 642c11a50b1SJeff Hugo } 643