xref: /linux/drivers/cxl/core/ras.c (revision 26fd9f7b7ff3794c5de0e6ae538cead53118b4c3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2025 AMD Corporation. All rights reserved. */
3 
4 #include <linux/pci.h>
5 #include <linux/aer.h>
6 #include <cxl/event.h>
7 #include <cxlmem.h>
8 #include "trace.h"
9 
cxl_cper_trace_corr_port_prot_err(struct pci_dev * pdev,struct cxl_ras_capability_regs ras_cap)10 static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev,
11 					      struct cxl_ras_capability_regs ras_cap)
12 {
13 	u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
14 
15 	trace_cxl_port_aer_correctable_error(&pdev->dev, status);
16 }
17 
cxl_cper_trace_uncorr_port_prot_err(struct pci_dev * pdev,struct cxl_ras_capability_regs ras_cap)18 static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev,
19 						struct cxl_ras_capability_regs ras_cap)
20 {
21 	u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
22 	u32 fe;
23 
24 	if (hweight32(status) > 1)
25 		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
26 				   ras_cap.cap_control));
27 	else
28 		fe = status;
29 
30 	trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe,
31 					       ras_cap.header_log);
32 }
33 
cxl_cper_trace_corr_prot_err(struct cxl_memdev * cxlmd,struct cxl_ras_capability_regs ras_cap)34 static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd,
35 					 struct cxl_ras_capability_regs ras_cap)
36 {
37 	u32 status = ras_cap.cor_status & ~ras_cap.cor_mask;
38 
39 	trace_cxl_aer_correctable_error(cxlmd, status);
40 }
41 
42 static void
cxl_cper_trace_uncorr_prot_err(struct cxl_memdev * cxlmd,struct cxl_ras_capability_regs ras_cap)43 cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd,
44 			       struct cxl_ras_capability_regs ras_cap)
45 {
46 	u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask;
47 	u32 fe;
48 
49 	if (hweight32(status) > 1)
50 		fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK,
51 				   ras_cap.cap_control));
52 	else
53 		fe = status;
54 
55 	trace_cxl_aer_uncorrectable_error(cxlmd, status, fe,
56 					  ras_cap.header_log);
57 }
58 
match_memdev_by_parent(struct device * dev,const void * uport)59 static int match_memdev_by_parent(struct device *dev, const void *uport)
60 {
61 	if (is_cxl_memdev(dev) && dev->parent == uport)
62 		return 1;
63 	return 0;
64 }
65 
cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data * data)66 static void cxl_cper_handle_prot_err(struct cxl_cper_prot_err_work_data *data)
67 {
68 	unsigned int devfn = PCI_DEVFN(data->prot_err.agent_addr.device,
69 				       data->prot_err.agent_addr.function);
70 	struct pci_dev *pdev __free(pci_dev_put) =
71 		pci_get_domain_bus_and_slot(data->prot_err.agent_addr.segment,
72 					    data->prot_err.agent_addr.bus,
73 					    devfn);
74 	struct cxl_memdev *cxlmd;
75 	int port_type;
76 
77 	if (!pdev)
78 		return;
79 
80 	port_type = pci_pcie_type(pdev);
81 	if (port_type == PCI_EXP_TYPE_ROOT_PORT ||
82 	    port_type == PCI_EXP_TYPE_DOWNSTREAM ||
83 	    port_type == PCI_EXP_TYPE_UPSTREAM) {
84 		if (data->severity == AER_CORRECTABLE)
85 			cxl_cper_trace_corr_port_prot_err(pdev, data->ras_cap);
86 		else
87 			cxl_cper_trace_uncorr_port_prot_err(pdev, data->ras_cap);
88 
89 		return;
90 	}
91 
92 	guard(device)(&pdev->dev);
93 	if (!pdev->dev.driver)
94 		return;
95 
96 	struct device *mem_dev __free(put_device) = bus_find_device(
97 		&cxl_bus_type, NULL, pdev, match_memdev_by_parent);
98 	if (!mem_dev)
99 		return;
100 
101 	cxlmd = to_cxl_memdev(mem_dev);
102 	if (data->severity == AER_CORRECTABLE)
103 		cxl_cper_trace_corr_prot_err(cxlmd, data->ras_cap);
104 	else
105 		cxl_cper_trace_uncorr_prot_err(cxlmd, data->ras_cap);
106 }
107 
cxl_cper_prot_err_work_fn(struct work_struct * work)108 static void cxl_cper_prot_err_work_fn(struct work_struct *work)
109 {
110 	struct cxl_cper_prot_err_work_data wd;
111 
112 	while (cxl_cper_prot_err_kfifo_get(&wd))
113 		cxl_cper_handle_prot_err(&wd);
114 }
115 static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn);
116 
cxl_ras_init(void)117 int cxl_ras_init(void)
118 {
119 	return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work);
120 }
121 
cxl_ras_exit(void)122 void cxl_ras_exit(void)
123 {
124 	cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work);
125 	cancel_work_sync(&cxl_cper_prot_err_work);
126 }
127