xref: /linux/drivers/accel/qaic/qaic_ras.c (revision bed29492d413349e5b13f21936655064cdb63c91)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
4 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
5 
6 #include <asm/byteorder.h>
7 #include <linux/device.h>
8 #include <linux/kernel.h>
9 #include <linux/mhi.h>
10 
11 #include "qaic.h"
12 #include "qaic_ras.h"
13 
14 #define MAGIC		0x55AA
15 #define VERSION		0x2
16 #define HDR_SZ		12
17 #define NUM_TEMP_LVL	3
18 #define POWER_BREAK	BIT(0)
19 
20 enum msg_type {
21 	MSG_PUSH, /* async push from device */
22 	MSG_REQ,  /* sync request to device */
23 	MSG_RESP, /* sync response from device */
24 };
25 
26 enum err_type {
27 	CE,	/* correctable error */
28 	UE,	/* uncorrectable error */
29 	UE_NF,	/* uncorrectable error that is non-fatal, expect a disruption */
30 	ERR_TYPE_MAX,
31 };
32 
33 static const char * const err_type_str[] = {
34 	[CE]    = "Correctable",
35 	[UE]    = "Uncorrectable",
36 	[UE_NF] = "Uncorrectable Non-Fatal",
37 };
38 
39 static const char * const err_class_str[] = {
40 	[CE]    = "Warning",
41 	[UE]    = "Fatal",
42 	[UE_NF] = "Warning",
43 };
44 
45 enum err_source {
46 	SOC_MEM,
47 	PCIE,
48 	DDR,
49 	SYS_BUS1,
50 	SYS_BUS2,
51 	NSP_MEM,
52 	TSENS,
53 };
54 
55 static const char * const err_src_str[TSENS + 1] = {
56 	[SOC_MEM]	= "SoC Memory",
57 	[PCIE]		= "PCIE",
58 	[DDR]		= "DDR",
59 	[SYS_BUS1]	= "System Bus source 1",
60 	[SYS_BUS2]	= "System Bus source 2",
61 	[NSP_MEM]	= "NSP Memory",
62 	[TSENS]		= "Temperature Sensors",
63 };
64 
65 struct ras_data {
66 	/* header start */
67 	/* Magic number to validate the message */
68 	u16 magic;
69 	/* RAS version number */
70 	u16 ver;
71 	u32 seq_num;
72 	/* RAS message type */
73 	u8  type;
74 	u8  id;
75 	/* Size of RAS message without the header in byte */
76 	u16 len;
77 	/* header end */
78 	s32 result;
79 	/*
80 	 * Error source
81 	 * 0 : SoC Memory
82 	 * 1 : PCIE
83 	 * 2 : DDR
84 	 * 3 : System Bus source 1
85 	 * 4 : System Bus source 2
86 	 * 5 : NSP Memory
87 	 * 6 : Temperature Sensors
88 	 */
89 	u32 source;
90 	/*
91 	 * Stores the error type, there are three types of error in RAS
92 	 * 0 : correctable error (CE)
93 	 * 1 : uncorrectable error (UE)
94 	 * 2 : uncorrectable error that is non-fatal (UE_NF)
95 	 */
96 	u32 err_type;
97 	u32 err_threshold;
98 	u32 ce_count;
99 	u32 ue_count;
100 	u32 intr_num;
101 	/* Data specific to error source */
102 	u8  syndrome[64];
103 } __packed;
104 
105 struct soc_mem_syndrome {
106 	u64 error_address[8];
107 } __packed;
108 
109 struct nsp_mem_syndrome {
110 	u32 error_address[8];
111 	u8 nsp_id;
112 } __packed;
113 
114 struct ddr_syndrome {
115 	u32 count;
116 	u32 irq_status;
117 	u32 data_31_0[2];
118 	u32 data_63_32[2];
119 	u32 data_95_64[2];
120 	u32 data_127_96[2];
121 	u32 addr_lsb;
122 	u16 addr_msb;
123 	u16 parity_bits;
124 	u16 instance;
125 	u16 err_type;
126 } __packed;
127 
128 struct tsens_syndrome {
129 	u32 threshold_type;
130 	s32 temp;
131 } __packed;
132 
133 struct sysbus1_syndrome {
134 	u32 slave;
135 	u32 err_type;
136 	u16 addr[8];
137 	u8  instance;
138 } __packed;
139 
140 struct sysbus2_syndrome {
141 	u32 lsb3;
142 	u32 msb3;
143 	u32 lsb2;
144 	u32 msb2;
145 	u32 ext_id;
146 	u16 path;
147 	u16 op_type;
148 	u16 len;
149 	u16 redirect;
150 	u8  valid;
151 	u8  word_error;
152 	u8  non_secure;
153 	u8  opc;
154 	u8  error_code;
155 	u8  trans_type;
156 	u8  addr_space;
157 	u8  instance;
158 } __packed;
159 
160 struct pcie_syndrome {
161 	/* CE info */
162 	u32 bad_tlp;
163 	u32 bad_dllp;
164 	u32 replay_rollover;
165 	u32 replay_timeout;
166 	u32 rx_err;
167 	u32 internal_ce_count;
168 	/* UE_NF info */
169 	u32 fc_timeout;
170 	u32 poison_tlp;
171 	u32 ecrc_err;
172 	u32 unsupported_req;
173 	u32 completer_abort;
174 	u32 completion_timeout;
175 	/* UE info */
176 	u32 addr;
177 	u8  index;
178 	/*
179 	 * Flag to indicate specific event of PCIe
180 	 * BIT(0): Power break (low power)
181 	 * BIT(1) to BIT(7): Reserved
182 	 */
183 	u8 flag;
184 } __packed;
185 
186 static const char * const threshold_type_str[NUM_TEMP_LVL] = {
187 	[0] = "lower",
188 	[1] = "upper",
189 	[2] = "critical",
190 };
191 
192 static void ras_msg_to_cpu(struct ras_data *msg)
193 {
194 	struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
195 	struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
196 	struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
197 	struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
198 	struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
199 	struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
200 	struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
201 	int i;
202 
203 	le16_to_cpus(&msg->magic);
204 	le16_to_cpus(&msg->ver);
205 	le32_to_cpus(&msg->seq_num);
206 	le16_to_cpus(&msg->len);
207 	le32_to_cpus(&msg->result);
208 	le32_to_cpus(&msg->source);
209 	le32_to_cpus(&msg->err_type);
210 	le32_to_cpus(&msg->err_threshold);
211 	le32_to_cpus(&msg->ce_count);
212 	le32_to_cpus(&msg->ue_count);
213 	le32_to_cpus(&msg->intr_num);
214 
215 	switch (msg->source) {
216 	case SOC_MEM:
217 		for (i = 0; i < 8; i++)
218 			le64_to_cpus(&soc_syndrome->error_address[i]);
219 		break;
220 	case PCIE:
221 		le32_to_cpus(&pcie_syndrome->bad_tlp);
222 		le32_to_cpus(&pcie_syndrome->bad_dllp);
223 		le32_to_cpus(&pcie_syndrome->replay_rollover);
224 		le32_to_cpus(&pcie_syndrome->replay_timeout);
225 		le32_to_cpus(&pcie_syndrome->rx_err);
226 		le32_to_cpus(&pcie_syndrome->internal_ce_count);
227 		le32_to_cpus(&pcie_syndrome->fc_timeout);
228 		le32_to_cpus(&pcie_syndrome->poison_tlp);
229 		le32_to_cpus(&pcie_syndrome->ecrc_err);
230 		le32_to_cpus(&pcie_syndrome->unsupported_req);
231 		le32_to_cpus(&pcie_syndrome->completer_abort);
232 		le32_to_cpus(&pcie_syndrome->completion_timeout);
233 		le32_to_cpus(&pcie_syndrome->addr);
234 		break;
235 	case DDR:
236 		le16_to_cpus(&ddr_syndrome->instance);
237 		le16_to_cpus(&ddr_syndrome->err_type);
238 		le32_to_cpus(&ddr_syndrome->count);
239 		le32_to_cpus(&ddr_syndrome->irq_status);
240 		le32_to_cpus(&ddr_syndrome->data_31_0[0]);
241 		le32_to_cpus(&ddr_syndrome->data_31_0[1]);
242 		le32_to_cpus(&ddr_syndrome->data_63_32[0]);
243 		le32_to_cpus(&ddr_syndrome->data_63_32[1]);
244 		le32_to_cpus(&ddr_syndrome->data_95_64[0]);
245 		le32_to_cpus(&ddr_syndrome->data_95_64[1]);
246 		le32_to_cpus(&ddr_syndrome->data_127_96[0]);
247 		le32_to_cpus(&ddr_syndrome->data_127_96[1]);
248 		le16_to_cpus(&ddr_syndrome->parity_bits);
249 		le16_to_cpus(&ddr_syndrome->addr_msb);
250 		le32_to_cpus(&ddr_syndrome->addr_lsb);
251 		break;
252 	case SYS_BUS1:
253 		le32_to_cpus(&sysbus1_syndrome->slave);
254 		le32_to_cpus(&sysbus1_syndrome->err_type);
255 		for (i = 0; i < 8; i++)
256 			le16_to_cpus(&sysbus1_syndrome->addr[i]);
257 		break;
258 	case SYS_BUS2:
259 		le16_to_cpus(&sysbus2_syndrome->op_type);
260 		le16_to_cpus(&sysbus2_syndrome->len);
261 		le16_to_cpus(&sysbus2_syndrome->redirect);
262 		le16_to_cpus(&sysbus2_syndrome->path);
263 		le32_to_cpus(&sysbus2_syndrome->ext_id);
264 		le32_to_cpus(&sysbus2_syndrome->lsb2);
265 		le32_to_cpus(&sysbus2_syndrome->msb2);
266 		le32_to_cpus(&sysbus2_syndrome->lsb3);
267 		le32_to_cpus(&sysbus2_syndrome->msb3);
268 		break;
269 	case NSP_MEM:
270 		for (i = 0; i < 8; i++)
271 			le32_to_cpus(&nsp_syndrome->error_address[i]);
272 		break;
273 	case TSENS:
274 		le32_to_cpus(&tsens_syndrome->threshold_type);
275 		le32_to_cpus(&tsens_syndrome->temp);
276 		break;
277 	}
278 }
279 
280 static void decode_ras_msg(struct qaic_device *qdev, struct ras_data *msg)
281 {
282 	struct sysbus1_syndrome *sysbus1_syndrome = (struct sysbus1_syndrome *)&msg->syndrome[0];
283 	struct sysbus2_syndrome *sysbus2_syndrome = (struct sysbus2_syndrome *)&msg->syndrome[0];
284 	struct soc_mem_syndrome *soc_syndrome = (struct soc_mem_syndrome *)&msg->syndrome[0];
285 	struct nsp_mem_syndrome *nsp_syndrome = (struct nsp_mem_syndrome *)&msg->syndrome[0];
286 	struct tsens_syndrome *tsens_syndrome = (struct tsens_syndrome *)&msg->syndrome[0];
287 	struct pcie_syndrome *pcie_syndrome = (struct pcie_syndrome *)&msg->syndrome[0];
288 	struct ddr_syndrome *ddr_syndrome = (struct ddr_syndrome *)&msg->syndrome[0];
289 	char *class;
290 	char *level;
291 
292 	if (msg->magic != MAGIC) {
293 		pci_warn(qdev->pdev, "Dropping RAS message with invalid magic %x\n", msg->magic);
294 		return;
295 	}
296 
297 	if (!msg->ver || msg->ver > VERSION) {
298 		pci_warn(qdev->pdev, "Dropping RAS message with invalid version %d\n", msg->ver);
299 		return;
300 	}
301 
302 	if (msg->type != MSG_PUSH) {
303 		pci_warn(qdev->pdev, "Dropping non-PUSH RAS message\n");
304 		return;
305 	}
306 
307 	if (msg->len != sizeof(*msg) - HDR_SZ) {
308 		pci_warn(qdev->pdev, "Dropping RAS message with invalid len %d\n", msg->len);
309 		return;
310 	}
311 
312 	if (msg->err_type >= ERR_TYPE_MAX) {
313 		pci_warn(qdev->pdev, "Dropping RAS message with err type %d\n", msg->err_type);
314 		return;
315 	}
316 
317 	if (msg->err_type == UE)
318 		level = KERN_ERR;
319 	else
320 		level = KERN_WARNING;
321 
322 	switch (msg->source) {
323 	case SOC_MEM:
324 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n    0x%llx\n",
325 			   err_class_str[msg->err_type],
326 			   err_type_str[msg->err_type],
327 			   "error from",
328 			   err_src_str[msg->source],
329 			   msg->err_threshold,
330 			   soc_syndrome->error_address[0],
331 			   soc_syndrome->error_address[1],
332 			   soc_syndrome->error_address[2],
333 			   soc_syndrome->error_address[3],
334 			   soc_syndrome->error_address[4],
335 			   soc_syndrome->error_address[5],
336 			   soc_syndrome->error_address[6],
337 			   soc_syndrome->error_address[7]);
338 		break;
339 	case PCIE:
340 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\n",
341 			   err_class_str[msg->err_type],
342 			   err_type_str[msg->err_type],
343 			   "error from",
344 			   err_src_str[msg->source],
345 			   msg->err_threshold);
346 
347 		switch (msg->err_type) {
348 		case CE:
349 			/*
350 			 * Modeled after AER prints. This continues the dev_printk() from a few
351 			 * lines up. We reduce duplication of code, but also avoid re-printing the
352 			 * PCI device info so that the end result looks uniform to the log user.
353 			 */
354 			printk(KERN_WARNING pr_fmt("Syndrome:\n    Bad TLP count %d\n    Bad DLLP count %d\n    Replay Rollover count %d\n    Replay Timeout count %d\n    Recv Error count %d\n    Internal CE count %d\n"),
355 			       pcie_syndrome->bad_tlp,
356 			       pcie_syndrome->bad_dllp,
357 			       pcie_syndrome->replay_rollover,
358 			       pcie_syndrome->replay_timeout,
359 			       pcie_syndrome->rx_err,
360 			       pcie_syndrome->internal_ce_count);
361 			if (msg->ver > 0x1)
362 				pr_warn("    Power break %s\n",
363 					pcie_syndrome->flag & POWER_BREAK ? "ON" : "OFF");
364 			break;
365 		case UE:
366 			printk(KERN_ERR pr_fmt("Syndrome:\n    Index %d\n    Address 0x%x\n"),
367 			       pcie_syndrome->index, pcie_syndrome->addr);
368 			break;
369 		case UE_NF:
370 			printk(KERN_WARNING pr_fmt("Syndrome:\n    FC timeout count %d\n    Poisoned TLP count %d\n    ECRC error count %d\n    Unsupported request count %d\n    Completer abort count %d\n    Completion timeout count %d\n"),
371 			       pcie_syndrome->fc_timeout,
372 			       pcie_syndrome->poison_tlp,
373 			       pcie_syndrome->ecrc_err,
374 			       pcie_syndrome->unsupported_req,
375 			       pcie_syndrome->completer_abort,
376 			       pcie_syndrome->completion_timeout);
377 			break;
378 		default:
379 			break;
380 		}
381 		break;
382 	case DDR:
383 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    Instance %d\n    Count %d\n    Data 31_0 0x%x 0x%x\n    Data 63_32 0x%x 0x%x\n    Data 95_64 0x%x 0x%x\n    Data 127_96 0x%x 0x%x\n    Parity bits 0x%x\n    Address msb 0x%x\n    Address lsb 0x%x\n",
384 			   err_class_str[msg->err_type],
385 			   err_type_str[msg->err_type],
386 			   "error from",
387 			   err_src_str[msg->source],
388 			   msg->err_threshold,
389 			   ddr_syndrome->instance,
390 			   ddr_syndrome->count,
391 			   ddr_syndrome->data_31_0[1],
392 			   ddr_syndrome->data_31_0[0],
393 			   ddr_syndrome->data_63_32[1],
394 			   ddr_syndrome->data_63_32[0],
395 			   ddr_syndrome->data_95_64[1],
396 			   ddr_syndrome->data_95_64[0],
397 			   ddr_syndrome->data_127_96[1],
398 			   ddr_syndrome->data_127_96[0],
399 			   ddr_syndrome->parity_bits,
400 			   ddr_syndrome->addr_msb,
401 			   ddr_syndrome->addr_lsb);
402 		break;
403 	case SYS_BUS1:
404 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    instance %d\n    %s\n    err_type %d\n    address0 0x%x\n    address1 0x%x\n    address2 0x%x\n    address3 0x%x\n    address4 0x%x\n    address5 0x%x\n    address6 0x%x\n    address7 0x%x\n",
405 			   err_class_str[msg->err_type],
406 			   err_type_str[msg->err_type],
407 			   "error from",
408 			   err_src_str[msg->source],
409 			   msg->err_threshold,
410 			   sysbus1_syndrome->instance,
411 			   sysbus1_syndrome->slave ? "Slave" : "Master",
412 			   sysbus1_syndrome->err_type,
413 			   sysbus1_syndrome->addr[0],
414 			   sysbus1_syndrome->addr[1],
415 			   sysbus1_syndrome->addr[2],
416 			   sysbus1_syndrome->addr[3],
417 			   sysbus1_syndrome->addr[4],
418 			   sysbus1_syndrome->addr[5],
419 			   sysbus1_syndrome->addr[6],
420 			   sysbus1_syndrome->addr[7]);
421 		break;
422 	case SYS_BUS2:
423 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    instance %d\n    valid %d\n    word error %d\n    non-secure %d\n    opc %d\n    error code %d\n    transaction type %d\n    address space %d\n    operation type %d\n    len %d\n    redirect %d\n    path %d\n    ext_id %d\n    lsb2 %d\n    msb2 %d\n    lsb3 %d\n    msb3 %d\n",
424 			   err_class_str[msg->err_type],
425 			   err_type_str[msg->err_type],
426 			   "error from",
427 			   err_src_str[msg->source],
428 			   msg->err_threshold,
429 			   sysbus2_syndrome->instance,
430 			   sysbus2_syndrome->valid,
431 			   sysbus2_syndrome->word_error,
432 			   sysbus2_syndrome->non_secure,
433 			   sysbus2_syndrome->opc,
434 			   sysbus2_syndrome->error_code,
435 			   sysbus2_syndrome->trans_type,
436 			   sysbus2_syndrome->addr_space,
437 			   sysbus2_syndrome->op_type,
438 			   sysbus2_syndrome->len,
439 			   sysbus2_syndrome->redirect,
440 			   sysbus2_syndrome->path,
441 			   sysbus2_syndrome->ext_id,
442 			   sysbus2_syndrome->lsb2,
443 			   sysbus2_syndrome->msb2,
444 			   sysbus2_syndrome->lsb3,
445 			   sysbus2_syndrome->msb3);
446 		break;
447 	case NSP_MEM:
448 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    NSP ID %d\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n    0x%x\n",
449 			   err_class_str[msg->err_type],
450 			   err_type_str[msg->err_type],
451 			   "error from",
452 			   err_src_str[msg->source],
453 			   msg->err_threshold,
454 			   nsp_syndrome->nsp_id,
455 			   nsp_syndrome->error_address[0],
456 			   nsp_syndrome->error_address[1],
457 			   nsp_syndrome->error_address[2],
458 			   nsp_syndrome->error_address[3],
459 			   nsp_syndrome->error_address[4],
460 			   nsp_syndrome->error_address[5],
461 			   nsp_syndrome->error_address[6],
462 			   nsp_syndrome->error_address[7]);
463 		break;
464 	case TSENS:
465 		if (tsens_syndrome->threshold_type >= NUM_TEMP_LVL) {
466 			pci_warn(qdev->pdev, "Dropping RAS message with invalid temp threshold %d\n",
467 				 tsens_syndrome->threshold_type);
468 			break;
469 		}
470 
471 		if (msg->err_type)
472 			class = "Fatal";
473 		else if (tsens_syndrome->threshold_type)
474 			class = "Critical";
475 		else
476 			class = "Warning";
477 
478 		dev_printk(level, &qdev->pdev->dev, "RAS event.\nClass:%s\nDescription:%s %s %s\nError Threshold for this report %d\nSyndrome:\n    %s threshold\n    %d deg C\n",
479 			   class,
480 			   err_type_str[msg->err_type],
481 			   "error from",
482 			   err_src_str[msg->source],
483 			   msg->err_threshold,
484 			   threshold_type_str[tsens_syndrome->threshold_type],
485 			   tsens_syndrome->temp);
486 		break;
487 	}
488 
489 	/* Uncorrectable errors are fatal */
490 	if (msg->err_type == UE)
491 		mhi_soc_reset(qdev->mhi_cntrl);
492 
493 	switch (msg->err_type) {
494 	case CE:
495 		if (qdev->ce_count != UINT_MAX)
496 			qdev->ce_count++;
497 		break;
498 	case UE:
499 		if (qdev->ue_count != UINT_MAX)
500 			qdev->ue_count++;
501 		break;
502 	case UE_NF:
503 		if (qdev->ue_nf_count != UINT_MAX)
504 			qdev->ue_nf_count++;
505 		break;
506 	default:
507 		/* not possible */
508 		break;
509 	}
510 }
511 
512 static ssize_t ce_count_show(struct device *dev, struct device_attribute *attr, char *buf)
513 {
514 	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
515 
516 	return sysfs_emit(buf, "%d\n", qdev->ce_count);
517 }
518 
519 static ssize_t ue_count_show(struct device *dev, struct device_attribute *attr, char *buf)
520 {
521 	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
522 
523 	return sysfs_emit(buf, "%d\n", qdev->ue_count);
524 }
525 
526 static ssize_t ue_nonfatal_count_show(struct device *dev, struct device_attribute *attr, char *buf)
527 {
528 	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(dev));
529 
530 	return sysfs_emit(buf, "%d\n", qdev->ue_nf_count);
531 }
532 
533 static DEVICE_ATTR_RO(ce_count);
534 static DEVICE_ATTR_RO(ue_count);
535 static DEVICE_ATTR_RO(ue_nonfatal_count);
536 
537 static struct attribute *ras_attrs[] = {
538 	&dev_attr_ce_count.attr,
539 	&dev_attr_ue_count.attr,
540 	&dev_attr_ue_nonfatal_count.attr,
541 	NULL,
542 };
543 
544 static struct attribute_group ras_group = {
545 	.attrs = ras_attrs,
546 };
547 
548 static int qaic_ras_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
549 {
550 	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
551 	struct ras_data *resp;
552 	int ret;
553 
554 	ret = mhi_prepare_for_transfer(mhi_dev);
555 	if (ret)
556 		return ret;
557 
558 	resp = kzalloc_obj(*resp);
559 	if (!resp) {
560 		mhi_unprepare_from_transfer(mhi_dev);
561 		return -ENOMEM;
562 	}
563 
564 	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp, sizeof(*resp), MHI_EOT);
565 	if (ret) {
566 		kfree(resp);
567 		mhi_unprepare_from_transfer(mhi_dev);
568 		return ret;
569 	}
570 
571 	ret = device_add_group(&qdev->pdev->dev, &ras_group);
572 	if (ret) {
573 		mhi_unprepare_from_transfer(mhi_dev);
574 		pci_dbg(qdev->pdev, "ras add sysfs failed %d\n", ret);
575 		return ret;
576 	}
577 
578 	dev_set_drvdata(&mhi_dev->dev, qdev);
579 	qdev->ras_ch = mhi_dev;
580 
581 	return ret;
582 }
583 
584 static void qaic_ras_mhi_remove(struct mhi_device *mhi_dev)
585 {
586 	struct qaic_device *qdev;
587 
588 	qdev = dev_get_drvdata(&mhi_dev->dev);
589 	qdev->ras_ch = NULL;
590 	device_remove_group(&qdev->pdev->dev, &ras_group);
591 	mhi_unprepare_from_transfer(mhi_dev);
592 }
593 
594 static void qaic_ras_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result) {}
595 
596 static void qaic_ras_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
597 {
598 	struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
599 	struct ras_data *msg = mhi_result->buf_addr;
600 	int ret;
601 
602 	if (mhi_result->transaction_status) {
603 		kfree(msg);
604 		return;
605 	}
606 
607 	ras_msg_to_cpu(msg);
608 	decode_ras_msg(qdev, msg);
609 
610 	ret = mhi_queue_buf(qdev->ras_ch, DMA_FROM_DEVICE, msg, sizeof(*msg), MHI_EOT);
611 	if (ret) {
612 		dev_err(&mhi_dev->dev, "Cannot requeue RAS recv buf %d\n", ret);
613 		kfree(msg);
614 	}
615 }
616 
617 static const struct mhi_device_id qaic_ras_mhi_match_table[] = {
618 	{ .chan = "QAIC_STATUS", },
619 	{},
620 };
621 
622 static struct mhi_driver qaic_ras_mhi_driver = {
623 	.id_table = qaic_ras_mhi_match_table,
624 	.remove = qaic_ras_mhi_remove,
625 	.probe = qaic_ras_mhi_probe,
626 	.ul_xfer_cb = qaic_ras_mhi_ul_xfer_cb,
627 	.dl_xfer_cb = qaic_ras_mhi_dl_xfer_cb,
628 	.driver = {
629 		.name = "qaic_ras",
630 	},
631 };
632 
633 int qaic_ras_register(void)
634 {
635 	return mhi_driver_register(&qaic_ras_mhi_driver);
636 }
637 
638 void qaic_ras_unregister(void)
639 {
640 	mhi_driver_unregister(&qaic_ras_mhi_driver);
641 }
642