xref: /linux/drivers/accel/qaic/qaic_ssr.c (revision 84318277d6334c6981ab326d4acc87c6a6ddc9b8)
1 // SPDX-License-Identifier: GPL-2.0-only
2 
3 /* Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */
4 /* Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */
5 
6 #include <asm/byteorder.h>
7 #include <drm/drm_file.h>
8 #include <drm/drm_managed.h>
9 #include <linux/devcoredump.h>
10 #include <linux/device.h>
11 #include <linux/kernel.h>
12 #include <linux/mhi.h>
13 #include <linux/workqueue.h>
14 
15 #include "qaic.h"
16 #include "qaic_ssr.h"
17 
18 #define SSR_RESP_MSG_SZ 32
19 #define SSR_MHI_BUF_SIZE SZ_64K
20 #define SSR_MEM_READ_DATA_SIZE ((u64)SSR_MHI_BUF_SIZE - sizeof(struct ssr_crashdump))
21 #define SSR_MEM_READ_CHUNK_SIZE ((u64)SSR_MEM_READ_DATA_SIZE - sizeof(struct ssr_memory_read_rsp))
22 
23 #define DEBUG_TRANSFER_INFO		BIT(0)
24 #define DEBUG_TRANSFER_INFO_RSP		BIT(1)
25 #define MEMORY_READ			BIT(2)
26 #define MEMORY_READ_RSP			BIT(3)
27 #define DEBUG_TRANSFER_DONE		BIT(4)
28 #define DEBUG_TRANSFER_DONE_RSP		BIT(5)
29 #define SSR_EVENT			BIT(8)
30 #define SSR_EVENT_RSP			BIT(9)
31 
32 #define SSR_EVENT_NACK		BIT(0)
33 #define BEFORE_SHUTDOWN		BIT(1)
34 #define AFTER_SHUTDOWN		BIT(2)
35 #define BEFORE_POWER_UP		BIT(3)
36 #define AFTER_POWER_UP		BIT(4)
37 
38 struct debug_info_table {
39 	/* Save preferences. Default is mandatory */
40 	u64 save_perf;
41 	/* Base address of the debug region */
42 	u64 mem_base;
43 	/* Size of debug region in bytes */
44 	u64 len;
45 	/* Description */
46 	char desc[20];
47 	/* Filename of debug region */
48 	char filename[20];
49 };
50 
51 struct _ssr_hdr {
52 	__le32 cmd;
53 	__le32 len;
54 	__le32 dbc_id;
55 };
56 
57 struct ssr_hdr {
58 	u32 cmd;
59 	u32 len;
60 	u32 dbc_id;
61 };
62 
63 struct ssr_debug_transfer_info {
64 	struct ssr_hdr hdr;
65 	u32 resv;
66 	u64 tbl_addr;
67 	u64 tbl_len;
68 } __packed;
69 
70 struct ssr_debug_transfer_info_rsp {
71 	struct _ssr_hdr hdr;
72 	__le32 ret;
73 } __packed;
74 
75 struct ssr_memory_read {
76 	struct _ssr_hdr hdr;
77 	__le32 resv;
78 	__le64 addr;
79 	__le64 len;
80 } __packed;
81 
82 struct ssr_memory_read_rsp {
83 	struct _ssr_hdr hdr;
84 	__le32 resv;
85 	u8 data[];
86 } __packed;
87 
88 struct ssr_debug_transfer_done {
89 	struct _ssr_hdr hdr;
90 	__le32 resv;
91 } __packed;
92 
93 struct ssr_debug_transfer_done_rsp {
94 	struct _ssr_hdr hdr;
95 	__le32 ret;
96 } __packed;
97 
98 struct ssr_event {
99 	struct ssr_hdr hdr;
100 	u32 event;
101 } __packed;
102 
103 struct ssr_event_rsp {
104 	struct _ssr_hdr hdr;
105 	__le32 event;
106 } __packed;
107 
108 struct ssr_resp {
109 	/* Work struct to schedule work coming on QAIC_SSR channel */
110 	struct work_struct work;
111 	/* Root struct of device, used to access device resources */
112 	struct qaic_device *qdev;
113 	/* Buffer used by MHI for transfer requests */
114 	u8 data[] __aligned(8);
115 };
116 
117 /* SSR crashdump book keeping structure */
118 struct ssr_dump_info {
119 	/* DBC associated with this SSR crashdump */
120 	struct dma_bridge_chan *dbc;
121 	/*
122 	 * It will be used when we complete the crashdump download and switch
123 	 * to waiting on SSR events
124 	 */
125 	struct ssr_resp *resp;
126 	/* MEMORY READ request MHI buffer.*/
127 	struct ssr_memory_read *read_buf_req;
128 	/* TRUE: ->read_buf_req is queued for MHI transaction. FALSE: Otherwise */
129 	bool read_buf_req_queued;
130 	/* Address of table in host */
131 	void *tbl_addr;
132 	/* Total size of table */
133 	u64 tbl_len;
134 	/* Offset of table(->tbl_addr) where the new chunk will be dumped */
135 	u64 tbl_off;
136 	/* Address of table in device/target */
137 	u64 tbl_addr_dev;
138 	/* Ptr to the entire dump */
139 	void *dump_addr;
140 	/* Entire crashdump size */
141 	u64 dump_sz;
142 	/* Offset of crashdump(->dump_addr) where the new chunk will be dumped */
143 	u64 dump_off;
144 	/* Points to the table entry we are currently downloading */
145 	struct debug_info_table *tbl_ent;
146 	/* Offset in the current table entry(->tbl_ent) for next chuck */
147 	u64 tbl_ent_off;
148 };
149 
150 struct ssr_crashdump {
151 	/*
152 	 * Points to a book keeping struct maintained by MHI SSR device while
153 	 * downloading a SSR crashdump. It is NULL when crashdump downloading
154 	 * not in progress.
155 	 */
156 	struct ssr_dump_info *dump_info;
157 	/* Work struct to schedule work coming on QAIC_SSR channel */
158 	struct work_struct work;
159 	/* Root struct of device, used to access device resources */
160 	struct qaic_device *qdev;
161 	/* Buffer used by MHI for transfer requests */
162 	u8 data[];
163 };
164 
165 #define QAIC_SSR_DUMP_V1_MAGIC 0x1234567890abcdef
166 #define QAIC_SSR_DUMP_V1_VER   1
167 struct dump_file_meta {
168 	u64 magic;
169 	u64 version;
170 	u64 size;		/* Total size of the entire dump */
171 	u64 tbl_len;		/* Length of the table in byte */
172 };
173 
174 /*
175  * Layout of crashdump
176  *              +------------------------------------------+
177  *              |         Crashdump Meta structure         |
178  *              | type: struct dump_file_meta              |
179  *              +------------------------------------------+
180  *              |             Crashdump Table              |
181  *              | type: array of struct debug_info_table   |
182  *              |                                          |
183  *              |                                          |
184  *              |                                          |
185  *              +------------------------------------------+
186  *              |                Crashdump                 |
187  *              |                                          |
188  *              |                                          |
189  *              |                                          |
190  *              |                                          |
191  *              |                                          |
192  *              +------------------------------------------+
193  */
194 
195 static void free_ssr_dump_info(struct ssr_crashdump *ssr_crash)
196 {
197 	struct ssr_dump_info *dump_info = ssr_crash->dump_info;
198 
199 	ssr_crash->dump_info = NULL;
200 	if (!dump_info)
201 		return;
202 	if (!dump_info->read_buf_req_queued)
203 		kfree(dump_info->read_buf_req);
204 	vfree(dump_info->tbl_addr);
205 	vfree(dump_info->dump_addr);
206 	kfree(dump_info);
207 }
208 
209 void qaic_clean_up_ssr(struct qaic_device *qdev)
210 {
211 	struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
212 
213 	if (!ssr_crash)
214 		return;
215 
216 	qaic_dbc_exit_ssr(qdev);
217 	free_ssr_dump_info(ssr_crash);
218 }
219 
220 static int alloc_dump(struct ssr_dump_info *dump_info)
221 {
222 	struct debug_info_table *tbl_ent = dump_info->tbl_addr;
223 	struct dump_file_meta *dump_meta;
224 	u64 tbl_sz_lp = 0;
225 	u64 dump_size = 0;
226 
227 	while (tbl_sz_lp < dump_info->tbl_len) {
228 		le64_to_cpus(&tbl_ent->save_perf);
229 		le64_to_cpus(&tbl_ent->mem_base);
230 		le64_to_cpus(&tbl_ent->len);
231 
232 		if (tbl_ent->len == 0)
233 			return -EINVAL;
234 
235 		dump_size += tbl_ent->len;
236 		tbl_ent++;
237 		tbl_sz_lp += sizeof(*tbl_ent);
238 	}
239 
240 	dump_info->dump_sz = dump_size + dump_info->tbl_len + sizeof(*dump_meta);
241 	dump_info->dump_addr = vzalloc(dump_info->dump_sz);
242 	if (!dump_info->dump_addr)
243 		return -ENOMEM;
244 
245 	/* Copy crashdump meta and table */
246 	dump_meta = dump_info->dump_addr;
247 	dump_meta->magic = QAIC_SSR_DUMP_V1_MAGIC;
248 	dump_meta->version = QAIC_SSR_DUMP_V1_VER;
249 	dump_meta->size = dump_info->dump_sz;
250 	dump_meta->tbl_len = dump_info->tbl_len;
251 	memcpy(dump_info->dump_addr + sizeof(*dump_meta), dump_info->tbl_addr, dump_info->tbl_len);
252 	/* Offset by crashdump meta and table (copied above) */
253 	dump_info->dump_off = dump_info->tbl_len + sizeof(*dump_meta);
254 
255 	return 0;
256 }
257 
258 static int send_xfer_done(struct qaic_device *qdev, void *resp, u32 dbc_id)
259 {
260 	struct ssr_debug_transfer_done *xfer_done;
261 	int ret;
262 
263 	xfer_done = kmalloc(sizeof(*xfer_done), GFP_KERNEL);
264 	if (!xfer_done) {
265 		ret = -ENOMEM;
266 		goto out;
267 	}
268 
269 	ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp, SSR_RESP_MSG_SZ, MHI_EOT);
270 	if (ret)
271 		goto free_xfer_done;
272 
273 	xfer_done->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_DONE);
274 	xfer_done->hdr.len = cpu_to_le32(sizeof(*xfer_done));
275 	xfer_done->hdr.dbc_id = cpu_to_le32(dbc_id);
276 
277 	ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, xfer_done, sizeof(*xfer_done), MHI_EOT);
278 	if (ret)
279 		goto free_xfer_done;
280 
281 	return 0;
282 
283 free_xfer_done:
284 	kfree(xfer_done);
285 out:
286 	return ret;
287 }
288 
289 static int mem_read_req(struct qaic_device *qdev, u64 dest_addr, u64 dest_len)
290 {
291 	struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
292 	struct ssr_memory_read *read_buf_req;
293 	struct ssr_dump_info *dump_info;
294 	int ret;
295 
296 	dump_info = ssr_crash->dump_info;
297 	ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, ssr_crash->data, SSR_MEM_READ_DATA_SIZE,
298 			    MHI_EOT);
299 	if (ret)
300 		goto out;
301 
302 	read_buf_req = dump_info->read_buf_req;
303 	read_buf_req->hdr.cmd = cpu_to_le32(MEMORY_READ);
304 	read_buf_req->hdr.len = cpu_to_le32(sizeof(*read_buf_req));
305 	read_buf_req->hdr.dbc_id = cpu_to_le32(qdev->ssr_dbc);
306 	read_buf_req->addr = cpu_to_le64(dest_addr);
307 	read_buf_req->len = cpu_to_le64(dest_len);
308 
309 	ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, read_buf_req, sizeof(*read_buf_req),
310 			    MHI_EOT);
311 	if (!ret)
312 		dump_info->read_buf_req_queued = true;
313 
314 out:
315 	return ret;
316 }
317 
318 static int ssr_copy_table(struct ssr_dump_info *dump_info, void *data, u64 len)
319 {
320 	if (len > dump_info->tbl_len - dump_info->tbl_off)
321 		return -EINVAL;
322 
323 	memcpy(dump_info->tbl_addr + dump_info->tbl_off, data, len);
324 	dump_info->tbl_off += len;
325 
326 	/* Entire table has been downloaded, alloc dump memory */
327 	if (dump_info->tbl_off == dump_info->tbl_len) {
328 		dump_info->tbl_ent = dump_info->tbl_addr;
329 		return alloc_dump(dump_info);
330 	}
331 
332 	return 0;
333 }
334 
335 static int ssr_copy_dump(struct ssr_dump_info *dump_info, void *data, u64 len)
336 {
337 	struct debug_info_table *tbl_ent;
338 
339 	tbl_ent = dump_info->tbl_ent;
340 
341 	if (len > tbl_ent->len - dump_info->tbl_ent_off)
342 		return -EINVAL;
343 
344 	memcpy(dump_info->dump_addr + dump_info->dump_off, data, len);
345 	dump_info->dump_off += len;
346 	dump_info->tbl_ent_off += len;
347 
348 	/*
349 	 * Current segment (a entry in table) of the crashdump is complete,
350 	 * move to next one
351 	 */
352 	if (tbl_ent->len == dump_info->tbl_ent_off) {
353 		dump_info->tbl_ent++;
354 		dump_info->tbl_ent_off = 0;
355 	}
356 
357 	return 0;
358 }
359 
360 static void ssr_dump_worker(struct work_struct *work)
361 {
362 	struct ssr_crashdump *ssr_crash = container_of(work, struct ssr_crashdump, work);
363 	struct qaic_device *qdev = ssr_crash->qdev;
364 	struct ssr_memory_read_rsp *mem_rd_resp;
365 	struct debug_info_table *tbl_ent;
366 	struct ssr_dump_info *dump_info;
367 	u64 dest_addr, dest_len;
368 	struct _ssr_hdr *_hdr;
369 	struct ssr_hdr hdr;
370 	u64 data_len;
371 	int ret;
372 
373 	mem_rd_resp = (struct ssr_memory_read_rsp *)ssr_crash->data;
374 	_hdr = &mem_rd_resp->hdr;
375 	hdr.cmd = le32_to_cpu(_hdr->cmd);
376 	hdr.len = le32_to_cpu(_hdr->len);
377 	hdr.dbc_id = le32_to_cpu(_hdr->dbc_id);
378 
379 	if (hdr.dbc_id != qdev->ssr_dbc)
380 		goto reset_device;
381 
382 	dump_info = ssr_crash->dump_info;
383 	if (!dump_info)
384 		goto reset_device;
385 
386 	if (hdr.cmd != MEMORY_READ_RSP)
387 		goto free_dump_info;
388 
389 	if (hdr.len > SSR_MEM_READ_DATA_SIZE)
390 		goto free_dump_info;
391 
392 	data_len = hdr.len - sizeof(*mem_rd_resp);
393 
394 	if (dump_info->tbl_off < dump_info->tbl_len) /* Chunk belongs to table */
395 		ret = ssr_copy_table(dump_info, mem_rd_resp->data, data_len);
396 	else /* Chunk belongs to crashdump */
397 		ret = ssr_copy_dump(dump_info, mem_rd_resp->data, data_len);
398 
399 	if (ret)
400 		goto free_dump_info;
401 
402 	if (dump_info->tbl_off < dump_info->tbl_len) {
403 		/* Continue downloading table */
404 		dest_addr = dump_info->tbl_addr_dev + dump_info->tbl_off;
405 		dest_len = min(SSR_MEM_READ_CHUNK_SIZE, dump_info->tbl_len - dump_info->tbl_off);
406 		ret = mem_read_req(qdev, dest_addr, dest_len);
407 	} else if (dump_info->dump_off < dump_info->dump_sz) {
408 		/* Continue downloading crashdump */
409 		tbl_ent = dump_info->tbl_ent;
410 		dest_addr = tbl_ent->mem_base + dump_info->tbl_ent_off;
411 		dest_len = min(SSR_MEM_READ_CHUNK_SIZE, tbl_ent->len - dump_info->tbl_ent_off);
412 		ret = mem_read_req(qdev, dest_addr, dest_len);
413 	} else {
414 		/* Crashdump download complete */
415 		ret = send_xfer_done(qdev, dump_info->resp->data, hdr.dbc_id);
416 	}
417 
418 	/* Most likely a MHI xfer has failed */
419 	if (ret)
420 		goto free_dump_info;
421 
422 	return;
423 
424 free_dump_info:
425 	/* Free the allocated memory */
426 	free_ssr_dump_info(ssr_crash);
427 reset_device:
428 	/*
429 	 * After subsystem crashes in device crashdump collection begins but
430 	 * something went wrong while collecting crashdump, now instead of
431 	 * handling this error we just reset the device as the best effort has
432 	 * been made
433 	 */
434 	mhi_soc_reset(qdev->mhi_cntrl);
435 }
436 
437 static struct ssr_dump_info *alloc_dump_info(struct qaic_device *qdev,
438 					     struct ssr_debug_transfer_info *debug_info)
439 {
440 	struct ssr_dump_info *dump_info;
441 	int ret;
442 
443 	le64_to_cpus(&debug_info->tbl_len);
444 	le64_to_cpus(&debug_info->tbl_addr);
445 
446 	if (debug_info->tbl_len == 0 ||
447 	    debug_info->tbl_len % sizeof(struct debug_info_table) != 0) {
448 		ret = -EINVAL;
449 		goto out;
450 	}
451 
452 	/* Allocate SSR crashdump book keeping structure */
453 	dump_info = kzalloc(sizeof(*dump_info), GFP_KERNEL);
454 	if (!dump_info) {
455 		ret = -ENOMEM;
456 		goto out;
457 	}
458 
459 	/* Buffer used to send MEMORY READ request to device via MHI */
460 	dump_info->read_buf_req = kzalloc(sizeof(*dump_info->read_buf_req), GFP_KERNEL);
461 	if (!dump_info->read_buf_req) {
462 		ret = -ENOMEM;
463 		goto free_dump_info;
464 	}
465 
466 	/* Crashdump meta table buffer */
467 	dump_info->tbl_addr = vzalloc(debug_info->tbl_len);
468 	if (!dump_info->tbl_addr) {
469 		ret = -ENOMEM;
470 		goto free_read_buf_req;
471 	}
472 
473 	dump_info->tbl_addr_dev = debug_info->tbl_addr;
474 	dump_info->tbl_len = debug_info->tbl_len;
475 
476 	return dump_info;
477 
478 free_read_buf_req:
479 	kfree(dump_info->read_buf_req);
480 free_dump_info:
481 	kfree(dump_info);
482 out:
483 	return ERR_PTR(ret);
484 }
485 
486 static int dbg_xfer_info_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
487 			     struct ssr_debug_transfer_info *debug_info)
488 {
489 	struct ssr_debug_transfer_info_rsp *debug_rsp;
490 	struct ssr_crashdump *ssr_crash = NULL;
491 	int ret = 0, ret2;
492 
493 	debug_rsp = kmalloc(sizeof(*debug_rsp), GFP_KERNEL);
494 	if (!debug_rsp)
495 		return -ENOMEM;
496 
497 	if (!qdev->ssr_mhi_buf) {
498 		ret = -ENOMEM;
499 		goto send_rsp;
500 	}
501 
502 	if (dbc->state != DBC_STATE_BEFORE_POWER_UP) {
503 		ret = -EINVAL;
504 		goto send_rsp;
505 	}
506 
507 	ssr_crash = qdev->ssr_mhi_buf;
508 	ssr_crash->dump_info = alloc_dump_info(qdev, debug_info);
509 	if (IS_ERR(ssr_crash->dump_info)) {
510 		ret = PTR_ERR(ssr_crash->dump_info);
511 		ssr_crash->dump_info = NULL;
512 	}
513 
514 send_rsp:
515 	debug_rsp->hdr.cmd = cpu_to_le32(DEBUG_TRANSFER_INFO_RSP);
516 	debug_rsp->hdr.len = cpu_to_le32(sizeof(*debug_rsp));
517 	debug_rsp->hdr.dbc_id = cpu_to_le32(dbc->id);
518 	/*
519 	 * 0 = Return an ACK confirming the host is ready to download crashdump
520 	 * 1 = Return an NACK confirming the host is not ready to download crashdump
521 	 */
522 	debug_rsp->ret = cpu_to_le32(ret ? 1 : 0);
523 
524 	ret2 = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, debug_rsp, sizeof(*debug_rsp), MHI_EOT);
525 	if (ret2) {
526 		free_ssr_dump_info(ssr_crash);
527 		kfree(debug_rsp);
528 		return ret2;
529 	}
530 
531 	return ret;
532 }
533 
534 static void dbg_xfer_done_rsp(struct qaic_device *qdev, struct dma_bridge_chan *dbc,
535 			      struct ssr_debug_transfer_done_rsp *xfer_rsp)
536 {
537 	struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
538 	u32 status = le32_to_cpu(xfer_rsp->ret);
539 	struct device *dev = &qdev->pdev->dev;
540 	struct ssr_dump_info *dump_info;
541 
542 	dump_info = ssr_crash->dump_info;
543 	if (!dump_info)
544 		return;
545 
546 	if (status) {
547 		free_ssr_dump_info(ssr_crash);
548 		return;
549 	}
550 
551 	dev_coredumpv(dev, dump_info->dump_addr, dump_info->dump_sz, GFP_KERNEL);
552 	/* dev_coredumpv will free dump_info->dump_addr */
553 	dump_info->dump_addr = NULL;
554 	free_ssr_dump_info(ssr_crash);
555 }
556 
557 static void ssr_worker(struct work_struct *work)
558 {
559 	struct ssr_resp *resp = container_of(work, struct ssr_resp, work);
560 	struct ssr_hdr *hdr = (struct ssr_hdr *)resp->data;
561 	struct ssr_dump_info *dump_info = NULL;
562 	struct qaic_device *qdev = resp->qdev;
563 	struct ssr_crashdump *ssr_crash;
564 	struct ssr_event_rsp *event_rsp;
565 	struct dma_bridge_chan *dbc;
566 	struct ssr_event *event;
567 	u32 ssr_event_ack;
568 	int ret;
569 
570 	le32_to_cpus(&hdr->cmd);
571 	le32_to_cpus(&hdr->len);
572 	le32_to_cpus(&hdr->dbc_id);
573 
574 	if (hdr->len > SSR_RESP_MSG_SZ)
575 		goto out;
576 
577 	if (hdr->dbc_id >= qdev->num_dbc)
578 		goto out;
579 
580 	dbc = &qdev->dbc[hdr->dbc_id];
581 
582 	switch (hdr->cmd) {
583 	case DEBUG_TRANSFER_INFO:
584 		ret = dbg_xfer_info_rsp(qdev, dbc, (struct ssr_debug_transfer_info *)resp->data);
585 		if (ret)
586 			break;
587 
588 		ssr_crash = qdev->ssr_mhi_buf;
589 		dump_info = ssr_crash->dump_info;
590 		dump_info->dbc = dbc;
591 		dump_info->resp = resp;
592 
593 		/* Start by downloading debug table */
594 		ret = mem_read_req(qdev, dump_info->tbl_addr_dev,
595 				   min(dump_info->tbl_len, SSR_MEM_READ_CHUNK_SIZE));
596 		if (ret) {
597 			free_ssr_dump_info(ssr_crash);
598 			break;
599 		}
600 
601 		/*
602 		 * Till now everything went fine, which means that we will be
603 		 * collecting crashdump chunk by chunk. Do not queue a response
604 		 * buffer for SSR cmds till the crashdump is complete.
605 		 */
606 		return;
607 	case SSR_EVENT:
608 		event = (struct ssr_event *)hdr;
609 		le32_to_cpus(&event->event);
610 		ssr_event_ack = event->event;
611 		ssr_crash = qdev->ssr_mhi_buf;
612 
613 		switch (event->event) {
614 		case BEFORE_SHUTDOWN:
615 			set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_SHUTDOWN);
616 			qaic_dbc_enter_ssr(qdev, hdr->dbc_id);
617 			break;
618 		case AFTER_SHUTDOWN:
619 			set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_SHUTDOWN);
620 			break;
621 		case BEFORE_POWER_UP:
622 			set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_BEFORE_POWER_UP);
623 			break;
624 		case AFTER_POWER_UP:
625 			/*
626 			 * If dump info is a non NULL value it means that we
627 			 * have received this SSR event while downloading a
628 			 * crashdump for this DBC is still in progress. NACK
629 			 * the SSR event
630 			 */
631 			if (ssr_crash && ssr_crash->dump_info) {
632 				free_ssr_dump_info(ssr_crash);
633 				ssr_event_ack = SSR_EVENT_NACK;
634 				break;
635 			}
636 
637 			set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_AFTER_POWER_UP);
638 			break;
639 		default:
640 			break;
641 		}
642 
643 		event_rsp = kmalloc(sizeof(*event_rsp), GFP_KERNEL);
644 		if (!event_rsp)
645 			break;
646 
647 		event_rsp->hdr.cmd = cpu_to_le32(SSR_EVENT_RSP);
648 		event_rsp->hdr.len = cpu_to_le32(sizeof(*event_rsp));
649 		event_rsp->hdr.dbc_id = cpu_to_le32(hdr->dbc_id);
650 		event_rsp->event = cpu_to_le32(ssr_event_ack);
651 
652 		ret = mhi_queue_buf(qdev->ssr_ch, DMA_TO_DEVICE, event_rsp, sizeof(*event_rsp),
653 				    MHI_EOT);
654 		if (ret)
655 			kfree(event_rsp);
656 
657 		if (event->event == AFTER_POWER_UP && ssr_event_ack != SSR_EVENT_NACK) {
658 			qaic_dbc_exit_ssr(qdev);
659 			set_dbc_state(qdev, hdr->dbc_id, DBC_STATE_IDLE);
660 		}
661 
662 		break;
663 	case DEBUG_TRANSFER_DONE_RSP:
664 		dbg_xfer_done_rsp(qdev, dbc, (struct ssr_debug_transfer_done_rsp *)hdr);
665 		break;
666 	default:
667 		break;
668 	}
669 
670 out:
671 	ret = mhi_queue_buf(qdev->ssr_ch, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
672 	if (ret)
673 		kfree(resp);
674 }
675 
676 static int qaic_ssr_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
677 {
678 	struct qaic_device *qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
679 	struct ssr_resp *resp;
680 	int ret;
681 
682 	ret = mhi_prepare_for_transfer(mhi_dev);
683 	if (ret)
684 		return ret;
685 
686 	resp = kzalloc(sizeof(*resp) + SSR_RESP_MSG_SZ, GFP_KERNEL);
687 	if (!resp) {
688 		mhi_unprepare_from_transfer(mhi_dev);
689 		return -ENOMEM;
690 	}
691 
692 	resp->qdev = qdev;
693 	INIT_WORK(&resp->work, ssr_worker);
694 
695 	ret = mhi_queue_buf(mhi_dev, DMA_FROM_DEVICE, resp->data, SSR_RESP_MSG_SZ, MHI_EOT);
696 	if (ret) {
697 		kfree(resp);
698 		mhi_unprepare_from_transfer(mhi_dev);
699 		return ret;
700 	}
701 
702 	dev_set_drvdata(&mhi_dev->dev, qdev);
703 	qdev->ssr_ch = mhi_dev;
704 
705 	return 0;
706 }
707 
708 static void qaic_ssr_mhi_remove(struct mhi_device *mhi_dev)
709 {
710 	struct qaic_device *qdev;
711 
712 	qdev = dev_get_drvdata(&mhi_dev->dev);
713 	mhi_unprepare_from_transfer(qdev->ssr_ch);
714 	qdev->ssr_ch = NULL;
715 }
716 
717 static void qaic_ssr_mhi_ul_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
718 {
719 	struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
720 	struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
721 	struct _ssr_hdr *hdr = mhi_result->buf_addr;
722 	struct ssr_dump_info *dump_info;
723 
724 	if (mhi_result->transaction_status) {
725 		kfree(mhi_result->buf_addr);
726 		return;
727 	}
728 
729 	/*
730 	 * MEMORY READ is used to download crashdump. And crashdump is
731 	 * downloaded chunk by chunk in a series of MEMORY READ SSR commands.
732 	 * Hence to avoid too many kmalloc() and kfree() of the same MEMORY READ
733 	 * request buffer, we allocate only one such buffer and free it only
734 	 * once.
735 	 */
736 	if (le32_to_cpu(hdr->cmd) == MEMORY_READ) {
737 		dump_info = ssr_crash->dump_info;
738 		if (dump_info) {
739 			dump_info->read_buf_req_queued = false;
740 			return;
741 		}
742 	}
743 
744 	kfree(mhi_result->buf_addr);
745 }
746 
747 static void qaic_ssr_mhi_dl_xfer_cb(struct mhi_device *mhi_dev, struct mhi_result *mhi_result)
748 {
749 	struct ssr_resp *resp = container_of(mhi_result->buf_addr, struct ssr_resp, data);
750 	struct qaic_device *qdev = dev_get_drvdata(&mhi_dev->dev);
751 	struct ssr_crashdump *ssr_crash = qdev->ssr_mhi_buf;
752 	bool memory_read_rsp = false;
753 
754 	if (ssr_crash && ssr_crash->data == mhi_result->buf_addr)
755 		memory_read_rsp = true;
756 
757 	if (mhi_result->transaction_status) {
758 		/* Do not free SSR crashdump buffer as it allocated via managed APIs */
759 		if (!memory_read_rsp)
760 			kfree(resp);
761 		return;
762 	}
763 
764 	if (memory_read_rsp)
765 		queue_work(qdev->ssr_wq, &ssr_crash->work);
766 	else
767 		queue_work(qdev->ssr_wq, &resp->work);
768 }
769 
770 static const struct mhi_device_id qaic_ssr_mhi_match_table[] = {
771 	{ .chan = "QAIC_SSR", },
772 	{},
773 };
774 
775 static struct mhi_driver qaic_ssr_mhi_driver = {
776 	.id_table = qaic_ssr_mhi_match_table,
777 	.remove = qaic_ssr_mhi_remove,
778 	.probe = qaic_ssr_mhi_probe,
779 	.ul_xfer_cb = qaic_ssr_mhi_ul_xfer_cb,
780 	.dl_xfer_cb = qaic_ssr_mhi_dl_xfer_cb,
781 	.driver = {
782 		.name = "qaic_ssr",
783 	},
784 };
785 
786 int qaic_ssr_init(struct qaic_device *qdev, struct drm_device *drm)
787 {
788 	struct ssr_crashdump *ssr_crash;
789 
790 	qdev->ssr_dbc = QAIC_SSR_DBC_SENTINEL;
791 
792 	/*
793 	 * Device requests only one SSR at a time. So allocating only one
794 	 * buffer to download crashdump is good enough.
795 	 */
796 	ssr_crash = drmm_kzalloc(drm, SSR_MHI_BUF_SIZE, GFP_KERNEL);
797 	if (!ssr_crash)
798 		return -ENOMEM;
799 
800 	ssr_crash->qdev = qdev;
801 	INIT_WORK(&ssr_crash->work, ssr_dump_worker);
802 	qdev->ssr_mhi_buf = ssr_crash;
803 
804 	return 0;
805 }
806 
807 int qaic_ssr_register(void)
808 {
809 	return mhi_driver_register(&qaic_ssr_mhi_driver);
810 }
811 
812 void qaic_ssr_unregister(void)
813 {
814 	mhi_driver_unregister(&qaic_ssr_mhi_driver);
815 }
816