xref: /linux/drivers/accel/habanalabs/common/debugfs.c (revision c94cd9508b1335b949fd13ebd269313c65492df0)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2021 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #include "habanalabs.h"
9 #include "../include/hw_ip/mmu/mmu_general.h"
10 
11 #include <linux/pci.h>
12 #include <linux/uaccess.h>
13 #include <linux/vmalloc.h>
14 #include <linux/iommu.h>
15 
16 #define MMU_ADDR_BUF_SIZE	40
17 #define MMU_ASID_BUF_SIZE	10
18 #define MMU_KBUF_SIZE		(MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
19 #define I2C_MAX_TRANSACTION_LEN	8
20 
21 static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
22 				u8 i2c_reg, u8 i2c_len, u64 *val)
23 {
24 	struct cpucp_packet pkt;
25 	int rc;
26 
27 	if (!hl_device_operational(hdev, NULL))
28 		return -EBUSY;
29 
30 	if (i2c_len > I2C_MAX_TRANSACTION_LEN) {
31 		dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",
32 				i2c_len, I2C_MAX_TRANSACTION_LEN);
33 		return -EINVAL;
34 	}
35 
36 	memset(&pkt, 0, sizeof(pkt));
37 
38 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
39 				CPUCP_PKT_CTL_OPCODE_SHIFT);
40 	pkt.i2c_bus = i2c_bus;
41 	pkt.i2c_addr = i2c_addr;
42 	pkt.i2c_reg = i2c_reg;
43 	pkt.i2c_len = i2c_len;
44 
45 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, val);
46 	if (rc && rc != -EAGAIN)
47 		dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
48 
49 	return rc;
50 }
51 
52 static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
53 				u8 i2c_reg, u8 i2c_len, u64 val)
54 {
55 	struct cpucp_packet pkt;
56 	int rc;
57 
58 	if (!hl_device_operational(hdev, NULL))
59 		return -EBUSY;
60 
61 	if (i2c_len > I2C_MAX_TRANSACTION_LEN) {
62 		dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",
63 				i2c_len, I2C_MAX_TRANSACTION_LEN);
64 		return -EINVAL;
65 	}
66 
67 	memset(&pkt, 0, sizeof(pkt));
68 
69 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
70 				CPUCP_PKT_CTL_OPCODE_SHIFT);
71 	pkt.i2c_bus = i2c_bus;
72 	pkt.i2c_addr = i2c_addr;
73 	pkt.i2c_reg = i2c_reg;
74 	pkt.i2c_len = i2c_len;
75 	pkt.value = cpu_to_le64(val);
76 
77 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
78 	if (rc && rc != -EAGAIN)
79 		dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
80 
81 	return rc;
82 }
83 
84 static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
85 {
86 	struct cpucp_packet pkt;
87 	int rc;
88 
89 	if (!hl_device_operational(hdev, NULL))
90 		return;
91 
92 	memset(&pkt, 0, sizeof(pkt));
93 
94 	pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<
95 				CPUCP_PKT_CTL_OPCODE_SHIFT);
96 	pkt.led_index = cpu_to_le32(led);
97 	pkt.value = cpu_to_le64(state);
98 
99 	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
100 	if (rc && rc != -EAGAIN)
101 		dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
102 }
103 
104 static int command_buffers_show(struct seq_file *s, void *data)
105 {
106 	struct hl_debugfs_entry *entry = s->private;
107 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
108 	struct hl_cb *cb;
109 	bool first = true;
110 
111 	spin_lock(&dev_entry->cb_spinlock);
112 
113 	list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
114 		if (first) {
115 			first = false;
116 			seq_puts(s, "\n");
117 			seq_puts(s, " CB ID   CTX ID   CB size    CB RefCnt    mmap?   CS counter\n");
118 			seq_puts(s, "---------------------------------------------------------------\n");
119 		}
120 		seq_printf(s,
121 			"   %03llu        %d    0x%08x      %d          %d          %d\n",
122 			cb->buf->handle, cb->ctx->asid, cb->size,
123 			kref_read(&cb->buf->refcount),
124 			atomic_read(&cb->buf->mmap), atomic_read(&cb->cs_cnt));
125 	}
126 
127 	spin_unlock(&dev_entry->cb_spinlock);
128 
129 	if (!first)
130 		seq_puts(s, "\n");
131 
132 	return 0;
133 }
134 
135 static int command_submission_show(struct seq_file *s, void *data)
136 {
137 	struct hl_debugfs_entry *entry = s->private;
138 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
139 	struct hl_cs *cs;
140 	bool first = true;
141 
142 	spin_lock(&dev_entry->cs_spinlock);
143 
144 	list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
145 		if (first) {
146 			first = false;
147 			seq_puts(s, "\n");
148 			seq_puts(s, " CS ID   CS TYPE   CTX ASID   CS RefCnt   Submitted    Completed\n");
149 			seq_puts(s, "----------------------------------------------------------------\n");
150 		}
151 		seq_printf(s,
152 			"   %llu        %d          %d          %d           %d            %d\n",
153 			cs->sequence, cs->type, cs->ctx->asid,
154 			kref_read(&cs->refcount),
155 			cs->submitted, cs->completed);
156 	}
157 
158 	spin_unlock(&dev_entry->cs_spinlock);
159 
160 	if (!first)
161 		seq_puts(s, "\n");
162 
163 	return 0;
164 }
165 
166 static int command_submission_jobs_show(struct seq_file *s, void *data)
167 {
168 	struct hl_debugfs_entry *entry = s->private;
169 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
170 	struct hl_cs_job *job;
171 	bool first = true;
172 
173 	spin_lock(&dev_entry->cs_job_spinlock);
174 
175 	list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
176 		if (first) {
177 			first = false;
178 			seq_puts(s, "\n");
179 			seq_puts(s, " JOB ID   CS ID    CS TYPE    CTX ASID   JOB RefCnt   H/W Queue\n");
180 			seq_puts(s, "---------------------------------------------------------------\n");
181 		}
182 		if (job->cs)
183 			seq_printf(s,
184 				"   %02d      %llu        %d        %d          %d           %d\n",
185 				job->id, job->cs->sequence, job->cs->type,
186 				job->cs->ctx->asid, kref_read(&job->refcount),
187 				job->hw_queue_id);
188 		else
189 			seq_printf(s,
190 				"   %02d      0        0        %d          %d           %d\n",
191 				job->id, HL_KERNEL_ASID_ID,
192 				kref_read(&job->refcount), job->hw_queue_id);
193 	}
194 
195 	spin_unlock(&dev_entry->cs_job_spinlock);
196 
197 	if (!first)
198 		seq_puts(s, "\n");
199 
200 	return 0;
201 }
202 
203 static int userptr_show(struct seq_file *s, void *data)
204 {
205 	struct hl_debugfs_entry *entry = s->private;
206 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
207 	struct hl_userptr *userptr;
208 	char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
209 				"DMA_FROM_DEVICE", "DMA_NONE"};
210 	bool first = true;
211 
212 	spin_lock(&dev_entry->userptr_spinlock);
213 
214 	list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
215 		if (first) {
216 			first = false;
217 			seq_puts(s, "\n");
218 			seq_puts(s, " pid      user virtual address     size             dma dir\n");
219 			seq_puts(s, "----------------------------------------------------------\n");
220 		}
221 		seq_printf(s, " %-7d  0x%-14llx      %-10llu    %-30s\n",
222 				userptr->pid, userptr->addr, userptr->size,
223 				dma_dir[userptr->dir]);
224 	}
225 
226 	spin_unlock(&dev_entry->userptr_spinlock);
227 
228 	if (!first)
229 		seq_puts(s, "\n");
230 
231 	return 0;
232 }
233 
234 static int vm_show(struct seq_file *s, void *data)
235 {
236 	struct hl_debugfs_entry *entry = s->private;
237 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
238 	struct hl_vm_hw_block_list_node *lnode;
239 	struct hl_ctx *ctx;
240 	struct hl_vm *vm;
241 	struct hl_vm_hash_node *hnode;
242 	struct hl_userptr *userptr;
243 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
244 	struct hl_va_range *va_range;
245 	struct hl_vm_va_block *va_block;
246 	enum vm_type *vm_type;
247 	bool once = true;
248 	u64 j;
249 	int i;
250 
251 	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
252 
253 	list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
254 		once = false;
255 		seq_puts(s, "\n\n----------------------------------------------------");
256 		seq_puts(s, "\n----------------------------------------------------\n\n");
257 		seq_printf(s, "ctx asid: %u\n", ctx->asid);
258 
259 		seq_puts(s, "\nmappings:\n\n");
260 		seq_puts(s, "    virtual address        size          handle\n");
261 		seq_puts(s, "----------------------------------------------------\n");
262 		mutex_lock(&ctx->mem_hash_lock);
263 		hash_for_each(ctx->mem_hash, i, hnode, node) {
264 			vm_type = hnode->ptr;
265 
266 			if (*vm_type == VM_TYPE_USERPTR) {
267 				userptr = hnode->ptr;
268 				seq_printf(s,
269 					"    0x%-14llx      %-10llu\n",
270 					hnode->vaddr, userptr->size);
271 			} else {
272 				phys_pg_pack = hnode->ptr;
273 				seq_printf(s,
274 					"    0x%-14llx      %-10llu       %-4u\n",
275 					hnode->vaddr, phys_pg_pack->total_size,
276 					phys_pg_pack->handle);
277 			}
278 		}
279 		mutex_unlock(&ctx->mem_hash_lock);
280 
281 		if (ctx->asid != HL_KERNEL_ASID_ID &&
282 		    !list_empty(&ctx->hw_block_mem_list)) {
283 			seq_puts(s, "\nhw_block mappings:\n\n");
284 			seq_puts(s,
285 				"    virtual address    block size    mapped size    HW block id\n");
286 			seq_puts(s,
287 				"---------------------------------------------------------------\n");
288 			mutex_lock(&ctx->hw_block_list_lock);
289 			list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
290 				seq_printf(s,
291 					"    0x%-14lx   %-6u        %-6u             %-9u\n",
292 					lnode->vaddr, lnode->block_size, lnode->mapped_size,
293 					lnode->id);
294 			}
295 			mutex_unlock(&ctx->hw_block_list_lock);
296 		}
297 
298 		vm = &ctx->hdev->vm;
299 		spin_lock(&vm->idr_lock);
300 
301 		if (!idr_is_empty(&vm->phys_pg_pack_handles))
302 			seq_puts(s, "\n\nallocations:\n");
303 
304 		idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
305 			if (phys_pg_pack->asid != ctx->asid)
306 				continue;
307 
308 			seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
309 			seq_printf(s, "page size: %u\n\n",
310 						phys_pg_pack->page_size);
311 			seq_puts(s, "   physical address\n");
312 			seq_puts(s, "---------------------\n");
313 			for (j = 0 ; j < phys_pg_pack->npages ; j++) {
314 				seq_printf(s, "    0x%-14llx\n",
315 						phys_pg_pack->pages[j]);
316 			}
317 		}
318 		spin_unlock(&vm->idr_lock);
319 
320 	}
321 
322 	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
323 
324 	ctx = hl_get_compute_ctx(dev_entry->hdev);
325 	if (ctx) {
326 		seq_puts(s, "\nVA ranges:\n\n");
327 		for (i = HL_VA_RANGE_TYPE_HOST ; i < HL_VA_RANGE_TYPE_MAX ; ++i) {
328 			va_range = ctx->va_range[i];
329 			seq_printf(s, "   va_range %d\n", i);
330 			seq_puts(s, "---------------------\n");
331 			mutex_lock(&va_range->lock);
332 			list_for_each_entry(va_block, &va_range->list, node) {
333 				seq_printf(s, "%#16llx - %#16llx (%#llx)\n",
334 					   va_block->start, va_block->end,
335 					   va_block->size);
336 			}
337 			mutex_unlock(&va_range->lock);
338 			seq_puts(s, "\n");
339 		}
340 		hl_ctx_put(ctx);
341 	}
342 
343 	if (!once)
344 		seq_puts(s, "\n");
345 
346 	return 0;
347 }
348 
349 static int userptr_lookup_show(struct seq_file *s, void *data)
350 {
351 	struct hl_debugfs_entry *entry = s->private;
352 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
353 	struct scatterlist *sg;
354 	struct hl_userptr *userptr;
355 	bool first = true;
356 	u64 total_npages, npages, sg_start, sg_end;
357 	dma_addr_t dma_addr;
358 	int i;
359 
360 	spin_lock(&dev_entry->userptr_spinlock);
361 
362 	list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
363 		if (dev_entry->userptr_lookup >= userptr->addr &&
364 		dev_entry->userptr_lookup < userptr->addr + userptr->size) {
365 			total_npages = 0;
366 			for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
367 				npages = hl_get_sg_info(sg, &dma_addr);
368 				sg_start = userptr->addr +
369 					total_npages * PAGE_SIZE;
370 				sg_end = userptr->addr +
371 					(total_npages + npages) * PAGE_SIZE;
372 
373 				if (dev_entry->userptr_lookup >= sg_start &&
374 				    dev_entry->userptr_lookup < sg_end) {
375 					dma_addr += (dev_entry->userptr_lookup -
376 							sg_start);
377 					if (first) {
378 						first = false;
379 						seq_puts(s, "\n");
380 						seq_puts(s, " user virtual address         dma address       pid        region start     region size\n");
381 						seq_puts(s, "---------------------------------------------------------------------------------------\n");
382 					}
383 					seq_printf(s, " 0x%-18llx  0x%-16llx  %-8u  0x%-16llx %-12llu\n",
384 						dev_entry->userptr_lookup,
385 						(u64)dma_addr, userptr->pid,
386 						userptr->addr, userptr->size);
387 				}
388 				total_npages += npages;
389 			}
390 		}
391 	}
392 
393 	spin_unlock(&dev_entry->userptr_spinlock);
394 
395 	if (!first)
396 		seq_puts(s, "\n");
397 
398 	return 0;
399 }
400 
401 static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
402 		size_t count, loff_t *f_pos)
403 {
404 	struct seq_file *s = file->private_data;
405 	struct hl_debugfs_entry *entry = s->private;
406 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
407 	ssize_t rc;
408 	u64 value;
409 
410 	rc = kstrtoull_from_user(buf, count, 16, &value);
411 	if (rc)
412 		return rc;
413 
414 	dev_entry->userptr_lookup = value;
415 
416 	return count;
417 }
418 
419 static int mmu_show(struct seq_file *s, void *data)
420 {
421 	struct hl_debugfs_entry *entry = s->private;
422 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
423 	struct hl_device *hdev = dev_entry->hdev;
424 	struct hl_ctx *ctx;
425 	struct hl_mmu_hop_info hops_info = {0};
426 	u64 virt_addr = dev_entry->mmu_addr, phys_addr;
427 	int i;
428 
429 	if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
430 		ctx = hdev->kernel_ctx;
431 	else
432 		ctx = hl_get_compute_ctx(hdev);
433 
434 	if (!ctx) {
435 		dev_err(hdev->dev, "no ctx available\n");
436 		return 0;
437 	}
438 
439 	if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) {
440 		dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
441 				virt_addr);
442 		goto put_ctx;
443 	}
444 
445 	hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
446 
447 	if (hops_info.scrambled_vaddr &&
448 		(dev_entry->mmu_addr != hops_info.scrambled_vaddr))
449 		seq_printf(s,
450 			"asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
451 			dev_entry->mmu_asid, dev_entry->mmu_addr,
452 			hops_info.scrambled_vaddr,
453 			hops_info.unscrambled_paddr, phys_addr);
454 	else
455 		seq_printf(s,
456 			"asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
457 			dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
458 
459 	for (i = 0 ; i < hops_info.used_hops ; i++) {
460 		seq_printf(s, "hop%d_addr: 0x%llx\n",
461 				i, hops_info.hop_info[i].hop_addr);
462 		seq_printf(s, "hop%d_pte_addr: 0x%llx\n",
463 				i, hops_info.hop_info[i].hop_pte_addr);
464 		seq_printf(s, "hop%d_pte: 0x%llx\n",
465 				i, hops_info.hop_info[i].hop_pte_val);
466 	}
467 
468 put_ctx:
469 	if (dev_entry->mmu_asid != HL_KERNEL_ASID_ID)
470 		hl_ctx_put(ctx);
471 
472 	return 0;
473 }
474 
475 static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
476 		size_t count, loff_t *f_pos)
477 {
478 	struct seq_file *s = file->private_data;
479 	struct hl_debugfs_entry *entry = s->private;
480 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
481 	struct hl_device *hdev = dev_entry->hdev;
482 	char kbuf[MMU_KBUF_SIZE] = {0};
483 	char *c;
484 	ssize_t rc;
485 
486 	if (count > sizeof(kbuf) - 1)
487 		goto err;
488 	if (copy_from_user(kbuf, buf, count))
489 		goto err;
490 	kbuf[count] = 0;
491 
492 	c = strchr(kbuf, ' ');
493 	if (!c)
494 		goto err;
495 	*c = '\0';
496 
497 	rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
498 	if (rc)
499 		goto err;
500 
501 	if (strncmp(c+1, "0x", 2))
502 		goto err;
503 	rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
504 	if (rc)
505 		goto err;
506 
507 	return count;
508 
509 err:
510 	dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
511 
512 	return -EINVAL;
513 }
514 
515 static int mmu_ack_error(struct seq_file *s, void *data)
516 {
517 	struct hl_debugfs_entry *entry = s->private;
518 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
519 	struct hl_device *hdev = dev_entry->hdev;
520 	int rc;
521 
522 	if (!dev_entry->mmu_cap_mask) {
523 		dev_err(hdev->dev, "mmu_cap_mask is not set\n");
524 		goto err;
525 	}
526 
527 	rc = hdev->asic_funcs->ack_mmu_errors(hdev, dev_entry->mmu_cap_mask);
528 	if (rc)
529 		goto err;
530 
531 	return 0;
532 err:
533 	return -EINVAL;
534 }
535 
536 static ssize_t mmu_ack_error_value_write(struct file *file,
537 		const char __user *buf,
538 		size_t count, loff_t *f_pos)
539 {
540 	struct seq_file *s = file->private_data;
541 	struct hl_debugfs_entry *entry = s->private;
542 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
543 	struct hl_device *hdev = dev_entry->hdev;
544 	char kbuf[MMU_KBUF_SIZE] = {0};
545 	ssize_t rc;
546 
547 	if (count > sizeof(kbuf) - 1)
548 		goto err;
549 
550 	if (copy_from_user(kbuf, buf, count))
551 		goto err;
552 
553 	kbuf[count] = 0;
554 
555 	if (strncmp(kbuf, "0x", 2))
556 		goto err;
557 
558 	rc = kstrtoull(kbuf, 16, &dev_entry->mmu_cap_mask);
559 	if (rc)
560 		goto err;
561 
562 	return count;
563 err:
564 	dev_err(hdev->dev, "usage: echo <0xmmu_cap_mask > > mmu_error\n");
565 
566 	return -EINVAL;
567 }
568 
569 static int engines_show(struct seq_file *s, void *data)
570 {
571 	struct hl_debugfs_entry *entry = s->private;
572 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
573 	struct hl_device *hdev = dev_entry->hdev;
574 	struct engines_data eng_data;
575 
576 	if (hdev->reset_info.in_reset) {
577 		dev_warn_ratelimited(hdev->dev,
578 				"Can't check device idle during reset\n");
579 		return 0;
580 	}
581 
582 	eng_data.actual_size = 0;
583 	eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
584 	eng_data.buf = vmalloc(eng_data.allocated_buf_size);
585 	if (!eng_data.buf)
586 		return -ENOMEM;
587 
588 	hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
589 
590 	if (eng_data.actual_size > eng_data.allocated_buf_size) {
591 		dev_err(hdev->dev,
592 				"Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
593 				eng_data.actual_size, eng_data.allocated_buf_size);
594 		vfree(eng_data.buf);
595 		return -ENOMEM;
596 	}
597 
598 	seq_write(s, eng_data.buf, eng_data.actual_size);
599 
600 	vfree(eng_data.buf);
601 
602 	return 0;
603 }
604 
605 static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
606 					size_t count, loff_t *ppos)
607 {
608 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
609 	struct hl_device *hdev = entry->hdev;
610 	u64 val = hdev->memory_scrub_val;
611 	int rc;
612 
613 	if (!hl_device_operational(hdev, NULL)) {
614 		dev_warn_ratelimited(hdev->dev, "Can't scrub memory, device is not operational\n");
615 		return -EIO;
616 	}
617 
618 	mutex_lock(&hdev->fpriv_list_lock);
619 	if (hdev->is_compute_ctx_active) {
620 		mutex_unlock(&hdev->fpriv_list_lock);
621 		dev_err(hdev->dev, "can't scrub dram, context exist\n");
622 		return -EBUSY;
623 	}
624 	hdev->is_in_dram_scrub = true;
625 	mutex_unlock(&hdev->fpriv_list_lock);
626 
627 	rc = hdev->asic_funcs->scrub_device_dram(hdev, val);
628 
629 	mutex_lock(&hdev->fpriv_list_lock);
630 	hdev->is_in_dram_scrub = false;
631 	mutex_unlock(&hdev->fpriv_list_lock);
632 
633 	if (rc)
634 		return rc;
635 	return count;
636 }
637 
638 static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
639 {
640 	struct asic_fixed_properties *prop = &hdev->asic_prop;
641 
642 	if (prop->dram_supports_virtual_memory &&
643 		(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
644 		return true;
645 
646 	if (addr >= prop->pmmu.start_addr &&
647 		addr < prop->pmmu.end_addr)
648 		return true;
649 
650 	if (addr >= prop->pmmu_huge.start_addr &&
651 		addr < prop->pmmu_huge.end_addr)
652 		return true;
653 
654 	return false;
655 }
656 
657 static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr,
658 						u32 size)
659 {
660 	struct asic_fixed_properties *prop = &hdev->asic_prop;
661 	u64 dram_start_addr, dram_end_addr;
662 
663 	if (prop->dram_supports_virtual_memory) {
664 		dram_start_addr = prop->dmmu.start_addr;
665 		dram_end_addr = prop->dmmu.end_addr;
666 	} else {
667 		dram_start_addr = prop->dram_base_address;
668 		dram_end_addr = prop->dram_end_address;
669 	}
670 
671 	if (hl_mem_area_inside_range(addr, size, dram_start_addr,
672 					dram_end_addr))
673 		return true;
674 
675 	if (hl_mem_area_inside_range(addr, size, prop->sram_base_address,
676 					prop->sram_end_address))
677 		return true;
678 
679 	return false;
680 }
681 
682 static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
683 			u64 *phys_addr)
684 {
685 	struct hl_vm_phys_pg_pack *phys_pg_pack;
686 	struct hl_ctx *ctx;
687 	struct hl_vm_hash_node *hnode;
688 	u64 end_address, range_size;
689 	struct hl_userptr *userptr;
690 	enum vm_type *vm_type;
691 	bool valid = false;
692 	int i, rc = 0;
693 
694 	ctx = hl_get_compute_ctx(hdev);
695 
696 	if (!ctx) {
697 		dev_err(hdev->dev, "no ctx available\n");
698 		return -EINVAL;
699 	}
700 
701 	/* Verify address is mapped */
702 	mutex_lock(&ctx->mem_hash_lock);
703 	hash_for_each(ctx->mem_hash, i, hnode, node) {
704 		vm_type = hnode->ptr;
705 
706 		if (*vm_type == VM_TYPE_USERPTR) {
707 			userptr = hnode->ptr;
708 			range_size = userptr->size;
709 		} else {
710 			phys_pg_pack = hnode->ptr;
711 			range_size = phys_pg_pack->total_size;
712 		}
713 
714 		end_address = virt_addr + size;
715 		if ((virt_addr >= hnode->vaddr) &&
716 				(end_address <= hnode->vaddr + range_size)) {
717 			valid = true;
718 			break;
719 		}
720 	}
721 	mutex_unlock(&ctx->mem_hash_lock);
722 
723 	if (!valid) {
724 		dev_err(hdev->dev,
725 			"virt addr 0x%llx is not mapped\n",
726 			virt_addr);
727 		rc = -EINVAL;
728 		goto put_ctx;
729 	}
730 
731 	rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);
732 	if (rc) {
733 		dev_err(hdev->dev,
734 			"virt addr 0x%llx is not mapped to phys addr\n",
735 			virt_addr);
736 		rc = -EINVAL;
737 	}
738 
739 put_ctx:
740 	hl_ctx_put(ctx);
741 
742 	return rc;
743 }
744 
745 static int hl_access_dev_mem_by_region(struct hl_device *hdev, u64 addr,
746 		u64 *val, enum debugfs_access_type acc_type, bool *found)
747 {
748 	size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?
749 		sizeof(u64) : sizeof(u32);
750 	struct pci_mem_region *mem_reg;
751 	int i;
752 
753 	for (i = 0; i < PCI_REGION_NUMBER; i++) {
754 		mem_reg = &hdev->pci_mem_region[i];
755 		if (!mem_reg->used)
756 			continue;
757 		if (addr >= mem_reg->region_base &&
758 			addr <= mem_reg->region_base + mem_reg->region_size - acc_size) {
759 			*found = true;
760 			return hdev->asic_funcs->access_dev_mem(hdev, i, addr, val, acc_type);
761 		}
762 	}
763 	return 0;
764 }
765 
766 static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
767 		enum debugfs_access_type acc_type)
768 {
769 	struct asic_fixed_properties *prop = &hdev->asic_prop;
770 	u64 offset = prop->device_dma_offset_for_host_access;
771 
772 	switch (acc_type) {
773 	case DEBUGFS_READ32:
774 		*val = *(u32 *) phys_to_virt(addr - offset);
775 		break;
776 	case DEBUGFS_WRITE32:
777 		*(u32 *) phys_to_virt(addr - offset) = *val;
778 		break;
779 	case DEBUGFS_READ64:
780 		*val = *(u64 *) phys_to_virt(addr - offset);
781 		break;
782 	case DEBUGFS_WRITE64:
783 		*(u64 *) phys_to_virt(addr - offset) = *val;
784 		break;
785 	default:
786 		dev_err(hdev->dev, "hostmem access-type %d id not supported\n", acc_type);
787 		break;
788 	}
789 }
790 
791 static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
792 				enum debugfs_access_type acc_type)
793 {
794 	size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?
795 		sizeof(u64) : sizeof(u32);
796 	u64 host_start = hdev->asic_prop.host_base_address;
797 	u64 host_end = hdev->asic_prop.host_end_address;
798 	bool user_address, found = false;
799 	int rc;
800 
801 	user_address = hl_is_device_va(hdev, addr);
802 	if (user_address) {
803 		rc = device_va_to_pa(hdev, addr, acc_size, &addr);
804 		if (rc)
805 			return rc;
806 	}
807 
808 	rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
809 	if (rc) {
810 		dev_err(hdev->dev,
811 			"Failed reading addr %#llx from dev mem (%d)\n",
812 			addr, rc);
813 		return rc;
814 	}
815 
816 	if (found)
817 		return 0;
818 
819 	if (!user_address || device_iommu_mapped(&hdev->pdev->dev)) {
820 		rc = -EINVAL;
821 		goto err;
822 	}
823 
824 	if (addr >= host_start && addr <= host_end - acc_size) {
825 		hl_access_host_mem(hdev, addr, val, acc_type);
826 	} else {
827 		rc = -EINVAL;
828 		goto err;
829 	}
830 
831 	return 0;
832 err:
833 	dev_err(hdev->dev, "invalid addr %#llx\n", addr);
834 	return rc;
835 }
836 
837 static ssize_t hl_data_read32(struct file *f, char __user *buf,
838 					size_t count, loff_t *ppos)
839 {
840 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
841 	struct hl_device *hdev = entry->hdev;
842 	u64 value64, addr = entry->addr;
843 	char tmp_buf[32];
844 	ssize_t rc;
845 	u32 val;
846 
847 	if (hdev->reset_info.in_reset) {
848 		dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
849 		return 0;
850 	}
851 
852 	if (*ppos)
853 		return 0;
854 
855 	rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_READ32);
856 	if (rc)
857 		return rc;
858 
859 	val = value64; /* downcast back to 32 */
860 
861 	sprintf(tmp_buf, "0x%08x\n", val);
862 	return simple_read_from_buffer(buf, count, ppos, tmp_buf,
863 			strlen(tmp_buf));
864 }
865 
866 static ssize_t hl_data_write32(struct file *f, const char __user *buf,
867 					size_t count, loff_t *ppos)
868 {
869 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
870 	struct hl_device *hdev = entry->hdev;
871 	u64 value64, addr = entry->addr;
872 	u32 value;
873 	ssize_t rc;
874 
875 	if (hdev->reset_info.in_reset) {
876 		dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
877 		return 0;
878 	}
879 
880 	rc = kstrtouint_from_user(buf, count, 16, &value);
881 	if (rc)
882 		return rc;
883 
884 	value64 = value;
885 	rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_WRITE32);
886 	if (rc)
887 		return rc;
888 
889 	return count;
890 }
891 
892 static ssize_t hl_data_read64(struct file *f, char __user *buf,
893 					size_t count, loff_t *ppos)
894 {
895 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
896 	struct hl_device *hdev = entry->hdev;
897 	u64 addr = entry->addr;
898 	char tmp_buf[32];
899 	ssize_t rc;
900 	u64 val;
901 
902 	if (hdev->reset_info.in_reset) {
903 		dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
904 		return 0;
905 	}
906 
907 	if (*ppos)
908 		return 0;
909 
910 	rc = hl_access_mem(hdev, addr, &val, DEBUGFS_READ64);
911 	if (rc)
912 		return rc;
913 
914 	sprintf(tmp_buf, "0x%016llx\n", val);
915 	return simple_read_from_buffer(buf, count, ppos, tmp_buf,
916 			strlen(tmp_buf));
917 }
918 
919 static ssize_t hl_data_write64(struct file *f, const char __user *buf,
920 					size_t count, loff_t *ppos)
921 {
922 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
923 	struct hl_device *hdev = entry->hdev;
924 	u64 addr = entry->addr;
925 	u64 value;
926 	ssize_t rc;
927 
928 	if (hdev->reset_info.in_reset) {
929 		dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
930 		return 0;
931 	}
932 
933 	rc = kstrtoull_from_user(buf, count, 16, &value);
934 	if (rc)
935 		return rc;
936 
937 	rc = hl_access_mem(hdev, addr, &value, DEBUGFS_WRITE64);
938 	if (rc)
939 		return rc;
940 
941 	return count;
942 }
943 
944 static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
945 					size_t count, loff_t *ppos)
946 {
947 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
948 	struct hl_device *hdev = entry->hdev;
949 	u64 addr = entry->addr;
950 	ssize_t rc;
951 	u32 size;
952 
953 	if (hdev->reset_info.in_reset) {
954 		dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n");
955 		return 0;
956 	}
957 	rc = kstrtouint_from_user(buf, count, 16, &size);
958 	if (rc)
959 		return rc;
960 
961 	if (!size) {
962 		dev_err(hdev->dev, "DMA read failed. size can't be 0\n");
963 		return -EINVAL;
964 	}
965 
966 	if (size > SZ_128M) {
967 		dev_err(hdev->dev,
968 			"DMA read failed. size can't be larger than 128MB\n");
969 		return -EINVAL;
970 	}
971 
972 	if (!hl_is_device_internal_memory_va(hdev, addr, size)) {
973 		dev_err(hdev->dev,
974 			"DMA read failed. Invalid 0x%010llx + 0x%08x\n",
975 			addr, size);
976 		return -EINVAL;
977 	}
978 
979 	/* Free the previous allocation, if there was any */
980 	entry->data_dma_blob_desc.size = 0;
981 	vfree(entry->data_dma_blob_desc.data);
982 
983 	entry->data_dma_blob_desc.data = vmalloc(size);
984 	if (!entry->data_dma_blob_desc.data)
985 		return -ENOMEM;
986 
987 	rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,
988 						entry->data_dma_blob_desc.data);
989 	if (rc) {
990 		dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);
991 		vfree(entry->data_dma_blob_desc.data);
992 		entry->data_dma_blob_desc.data = NULL;
993 		return -EIO;
994 	}
995 
996 	entry->data_dma_blob_desc.size = size;
997 
998 	return count;
999 }
1000 
1001 static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf,
1002 					size_t count, loff_t *ppos)
1003 {
1004 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1005 	struct hl_device *hdev = entry->hdev;
1006 	u32 size, trig;
1007 	ssize_t rc;
1008 
1009 	if (hdev->reset_info.in_reset) {
1010 		dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n");
1011 		return 0;
1012 	}
1013 	rc = kstrtouint_from_user(buf, count, 10, &trig);
1014 	if (rc)
1015 		return rc;
1016 
1017 	if (trig != 1) {
1018 		dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n");
1019 		return -EINVAL;
1020 	}
1021 
1022 	size = sizeof(struct cpucp_monitor_dump);
1023 
1024 	/* Free the previous allocation, if there was any */
1025 	entry->mon_dump_blob_desc.size = 0;
1026 	vfree(entry->mon_dump_blob_desc.data);
1027 
1028 	entry->mon_dump_blob_desc.data = vmalloc(size);
1029 	if (!entry->mon_dump_blob_desc.data)
1030 		return -ENOMEM;
1031 
1032 	rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data);
1033 	if (rc) {
1034 		dev_err(hdev->dev, "Failed to dump monitors\n");
1035 		vfree(entry->mon_dump_blob_desc.data);
1036 		entry->mon_dump_blob_desc.data = NULL;
1037 		return -EIO;
1038 	}
1039 
1040 	entry->mon_dump_blob_desc.size = size;
1041 
1042 	return count;
1043 }
1044 
1045 static ssize_t hl_get_power_state(struct file *f, char __user *buf,
1046 		size_t count, loff_t *ppos)
1047 {
1048 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1049 	struct hl_device *hdev = entry->hdev;
1050 	char tmp_buf[200];
1051 	int i;
1052 
1053 	if (*ppos)
1054 		return 0;
1055 
1056 	if (hdev->pdev->current_state == PCI_D0)
1057 		i = 1;
1058 	else if (hdev->pdev->current_state == PCI_D3hot)
1059 		i = 2;
1060 	else
1061 		i = 3;
1062 
1063 	sprintf(tmp_buf,
1064 		"current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
1065 	return simple_read_from_buffer(buf, count, ppos, tmp_buf,
1066 			strlen(tmp_buf));
1067 }
1068 
1069 static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
1070 					size_t count, loff_t *ppos)
1071 {
1072 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1073 	struct hl_device *hdev = entry->hdev;
1074 	u32 value;
1075 	ssize_t rc;
1076 
1077 	rc = kstrtouint_from_user(buf, count, 10, &value);
1078 	if (rc)
1079 		return rc;
1080 
1081 	if (value == 1) {
1082 		pci_set_power_state(hdev->pdev, PCI_D0);
1083 		pci_restore_state(hdev->pdev);
1084 		rc = pci_enable_device(hdev->pdev);
1085 		if (rc < 0)
1086 			return rc;
1087 	} else if (value == 2) {
1088 		pci_save_state(hdev->pdev);
1089 		pci_disable_device(hdev->pdev);
1090 		pci_set_power_state(hdev->pdev, PCI_D3hot);
1091 	} else {
1092 		dev_dbg(hdev->dev, "invalid power state value %u\n", value);
1093 		return -EINVAL;
1094 	}
1095 
1096 	return count;
1097 }
1098 
1099 static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
1100 					size_t count, loff_t *ppos)
1101 {
1102 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1103 	struct hl_device *hdev = entry->hdev;
1104 	char tmp_buf[32];
1105 	u64 val;
1106 	ssize_t rc;
1107 
1108 	if (*ppos)
1109 		return 0;
1110 
1111 	rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
1112 			entry->i2c_reg, entry->i2c_len, &val);
1113 	if (rc) {
1114 		dev_err(hdev->dev,
1115 			"Failed to read from I2C bus %d, addr %d, reg %d, len %d\n",
1116 			entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);
1117 		return rc;
1118 	}
1119 
1120 	sprintf(tmp_buf, "%#02llx\n", val);
1121 	rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
1122 			strlen(tmp_buf));
1123 
1124 	return rc;
1125 }
1126 
1127 static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
1128 					size_t count, loff_t *ppos)
1129 {
1130 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1131 	struct hl_device *hdev = entry->hdev;
1132 	u64 value;
1133 	ssize_t rc;
1134 
1135 	rc = kstrtou64_from_user(buf, count, 16, &value);
1136 	if (rc)
1137 		return rc;
1138 
1139 	rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
1140 			entry->i2c_reg, entry->i2c_len, value);
1141 	if (rc) {
1142 		dev_err(hdev->dev,
1143 			"Failed to write %#02llx to I2C bus %d, addr %d, reg %d, len %d\n",
1144 			value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);
1145 		return rc;
1146 	}
1147 
1148 	return count;
1149 }
1150 
1151 static ssize_t hl_led0_write(struct file *f, const char __user *buf,
1152 					size_t count, loff_t *ppos)
1153 {
1154 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1155 	struct hl_device *hdev = entry->hdev;
1156 	u32 value;
1157 	ssize_t rc;
1158 
1159 	rc = kstrtouint_from_user(buf, count, 10, &value);
1160 	if (rc)
1161 		return rc;
1162 
1163 	value = value ? 1 : 0;
1164 
1165 	hl_debugfs_led_set(hdev, 0, value);
1166 
1167 	return count;
1168 }
1169 
1170 static ssize_t hl_led1_write(struct file *f, const char __user *buf,
1171 					size_t count, loff_t *ppos)
1172 {
1173 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1174 	struct hl_device *hdev = entry->hdev;
1175 	u32 value;
1176 	ssize_t rc;
1177 
1178 	rc = kstrtouint_from_user(buf, count, 10, &value);
1179 	if (rc)
1180 		return rc;
1181 
1182 	value = value ? 1 : 0;
1183 
1184 	hl_debugfs_led_set(hdev, 1, value);
1185 
1186 	return count;
1187 }
1188 
1189 static ssize_t hl_led2_write(struct file *f, const char __user *buf,
1190 					size_t count, loff_t *ppos)
1191 {
1192 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1193 	struct hl_device *hdev = entry->hdev;
1194 	u32 value;
1195 	ssize_t rc;
1196 
1197 	rc = kstrtouint_from_user(buf, count, 10, &value);
1198 	if (rc)
1199 		return rc;
1200 
1201 	value = value ? 1 : 0;
1202 
1203 	hl_debugfs_led_set(hdev, 2, value);
1204 
1205 	return count;
1206 }
1207 
1208 static ssize_t hl_device_read(struct file *f, char __user *buf,
1209 					size_t count, loff_t *ppos)
1210 {
1211 	static const char *help =
1212 		"Valid values: disable, enable, suspend, resume, cpu_timeout\n";
1213 	return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
1214 }
1215 
1216 static ssize_t hl_device_write(struct file *f, const char __user *buf,
1217 				     size_t count, loff_t *ppos)
1218 {
1219 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1220 	struct hl_device *hdev = entry->hdev;
1221 	char data[30] = {0};
1222 
1223 	/* don't allow partial writes */
1224 	if (*ppos != 0)
1225 		return 0;
1226 
1227 	simple_write_to_buffer(data, 29, ppos, buf, count);
1228 
1229 	if (strncmp("disable", data, strlen("disable")) == 0) {
1230 		hdev->disabled = true;
1231 	} else if (strncmp("enable", data, strlen("enable")) == 0) {
1232 		hdev->disabled = false;
1233 	} else if (strncmp("suspend", data, strlen("suspend")) == 0) {
1234 		hdev->asic_funcs->suspend(hdev);
1235 	} else if (strncmp("resume", data, strlen("resume")) == 0) {
1236 		hdev->asic_funcs->resume(hdev);
1237 	} else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
1238 		hdev->device_cpu_disabled = true;
1239 	} else {
1240 		dev_err(hdev->dev,
1241 			"Valid values: disable, enable, suspend, resume, cpu_timeout\n");
1242 		count = -EINVAL;
1243 	}
1244 
1245 	return count;
1246 }
1247 
1248 static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
1249 					size_t count, loff_t *ppos)
1250 {
1251 	return 0;
1252 }
1253 
1254 static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
1255 				     size_t count, loff_t *ppos)
1256 {
1257 	return count;
1258 }
1259 
1260 static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
1261 					size_t count, loff_t *ppos)
1262 {
1263 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1264 	struct hl_device *hdev = entry->hdev;
1265 	char tmp_buf[200];
1266 	ssize_t rc;
1267 
1268 	if (!hdev->asic_prop.configurable_stop_on_err)
1269 		return -EOPNOTSUPP;
1270 
1271 	if (*ppos)
1272 		return 0;
1273 
1274 	sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
1275 	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
1276 			strlen(tmp_buf) + 1);
1277 
1278 	return rc;
1279 }
1280 
1281 static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
1282 				     size_t count, loff_t *ppos)
1283 {
1284 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1285 	struct hl_device *hdev = entry->hdev;
1286 	u32 value;
1287 	ssize_t rc;
1288 
1289 	if (!hdev->asic_prop.configurable_stop_on_err)
1290 		return -EOPNOTSUPP;
1291 
1292 	if (hdev->reset_info.in_reset) {
1293 		dev_warn_ratelimited(hdev->dev,
1294 				"Can't change stop on error during reset\n");
1295 		return 0;
1296 	}
1297 
1298 	rc = kstrtouint_from_user(buf, count, 10, &value);
1299 	if (rc)
1300 		return rc;
1301 
1302 	hdev->stop_on_err = value ? 1 : 0;
1303 
1304 	hl_device_reset(hdev, 0);
1305 
1306 	return count;
1307 }
1308 
1309 static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
1310 					size_t count, loff_t *ppos)
1311 {
1312 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1313 	struct hl_device *hdev = entry->hdev;
1314 
1315 	hdev->asic_funcs->ack_protection_bits_errors(hdev);
1316 
1317 	return 0;
1318 }
1319 
1320 static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
1321 					size_t count, loff_t *ppos)
1322 {
1323 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1324 	ssize_t rc;
1325 
1326 	down_read(&entry->state_dump_sem);
1327 	if (!entry->state_dump[entry->state_dump_head])
1328 		rc = 0;
1329 	else
1330 		rc = simple_read_from_buffer(
1331 			buf, count, ppos,
1332 			entry->state_dump[entry->state_dump_head],
1333 			strlen(entry->state_dump[entry->state_dump_head]));
1334 	up_read(&entry->state_dump_sem);
1335 
1336 	return rc;
1337 }
1338 
1339 static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
1340 					size_t count, loff_t *ppos)
1341 {
1342 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1343 	struct hl_device *hdev = entry->hdev;
1344 	ssize_t rc;
1345 	u32 size;
1346 	int i;
1347 
1348 	rc = kstrtouint_from_user(buf, count, 10, &size);
1349 	if (rc)
1350 		return rc;
1351 
1352 	if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
1353 		dev_err(hdev->dev, "Invalid number of dumps to skip\n");
1354 		return -EINVAL;
1355 	}
1356 
1357 	if (entry->state_dump[entry->state_dump_head]) {
1358 		down_write(&entry->state_dump_sem);
1359 		for (i = 0; i < size; ++i) {
1360 			vfree(entry->state_dump[entry->state_dump_head]);
1361 			entry->state_dump[entry->state_dump_head] = NULL;
1362 			if (entry->state_dump_head > 0)
1363 				entry->state_dump_head--;
1364 			else
1365 				entry->state_dump_head =
1366 					ARRAY_SIZE(entry->state_dump) - 1;
1367 		}
1368 		up_write(&entry->state_dump_sem);
1369 	}
1370 
1371 	return count;
1372 }
1373 
1374 static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
1375 					size_t count, loff_t *ppos)
1376 {
1377 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1378 	struct hl_device *hdev = entry->hdev;
1379 	char tmp_buf[200];
1380 	ssize_t rc;
1381 
1382 	if (*ppos)
1383 		return 0;
1384 
1385 	sprintf(tmp_buf, "%d\n",
1386 		jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
1387 	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
1388 			strlen(tmp_buf) + 1);
1389 
1390 	return rc;
1391 }
1392 
1393 static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
1394 				     size_t count, loff_t *ppos)
1395 {
1396 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1397 	struct hl_device *hdev = entry->hdev;
1398 	u32 value;
1399 	ssize_t rc;
1400 
1401 	rc = kstrtouint_from_user(buf, count, 10, &value);
1402 	if (rc)
1403 		return rc;
1404 
1405 	if (value)
1406 		hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
1407 	else
1408 		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
1409 
1410 	return count;
1411 }
1412 
1413 static ssize_t hl_check_razwi_happened(struct file *f, char __user *buf,
1414 					size_t count, loff_t *ppos)
1415 {
1416 	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1417 	struct hl_device *hdev = entry->hdev;
1418 
1419 	hdev->asic_funcs->check_if_razwi_happened(hdev);
1420 
1421 	return 0;
1422 }
1423 
1424 static const struct file_operations hl_mem_scrub_fops = {
1425 	.owner = THIS_MODULE,
1426 	.write = hl_memory_scrub,
1427 };
1428 
1429 static const struct file_operations hl_data32b_fops = {
1430 	.owner = THIS_MODULE,
1431 	.read = hl_data_read32,
1432 	.write = hl_data_write32
1433 };
1434 
1435 static const struct file_operations hl_data64b_fops = {
1436 	.owner = THIS_MODULE,
1437 	.read = hl_data_read64,
1438 	.write = hl_data_write64
1439 };
1440 
1441 static const struct file_operations hl_dma_size_fops = {
1442 	.owner = THIS_MODULE,
1443 	.write = hl_dma_size_write
1444 };
1445 
1446 static const struct file_operations hl_monitor_dump_fops = {
1447 	.owner = THIS_MODULE,
1448 	.write = hl_monitor_dump_trigger
1449 };
1450 
1451 static const struct file_operations hl_i2c_data_fops = {
1452 	.owner = THIS_MODULE,
1453 	.read = hl_i2c_data_read,
1454 	.write = hl_i2c_data_write
1455 };
1456 
1457 static const struct file_operations hl_power_fops = {
1458 	.owner = THIS_MODULE,
1459 	.read = hl_get_power_state,
1460 	.write = hl_set_power_state
1461 };
1462 
1463 static const struct file_operations hl_led0_fops = {
1464 	.owner = THIS_MODULE,
1465 	.write = hl_led0_write
1466 };
1467 
1468 static const struct file_operations hl_led1_fops = {
1469 	.owner = THIS_MODULE,
1470 	.write = hl_led1_write
1471 };
1472 
1473 static const struct file_operations hl_led2_fops = {
1474 	.owner = THIS_MODULE,
1475 	.write = hl_led2_write
1476 };
1477 
1478 static const struct file_operations hl_device_fops = {
1479 	.owner = THIS_MODULE,
1480 	.read = hl_device_read,
1481 	.write = hl_device_write
1482 };
1483 
1484 static const struct file_operations hl_clk_gate_fops = {
1485 	.owner = THIS_MODULE,
1486 	.read = hl_clk_gate_read,
1487 	.write = hl_clk_gate_write
1488 };
1489 
1490 static const struct file_operations hl_stop_on_err_fops = {
1491 	.owner = THIS_MODULE,
1492 	.read = hl_stop_on_err_read,
1493 	.write = hl_stop_on_err_write
1494 };
1495 
1496 static const struct file_operations hl_security_violations_fops = {
1497 	.owner = THIS_MODULE,
1498 	.read = hl_security_violations_read
1499 };
1500 
1501 static const struct file_operations hl_state_dump_fops = {
1502 	.owner = THIS_MODULE,
1503 	.read = hl_state_dump_read,
1504 	.write = hl_state_dump_write
1505 };
1506 
1507 static const struct file_operations hl_timeout_locked_fops = {
1508 	.owner = THIS_MODULE,
1509 	.read = hl_timeout_locked_read,
1510 	.write = hl_timeout_locked_write
1511 };
1512 
1513 static const struct file_operations hl_razwi_check_fops = {
1514 	.owner = THIS_MODULE,
1515 	.read = hl_check_razwi_happened
1516 };
1517 
1518 static const struct hl_info_list hl_debugfs_list[] = {
1519 	{"command_buffers", command_buffers_show, NULL},
1520 	{"command_submission", command_submission_show, NULL},
1521 	{"command_submission_jobs", command_submission_jobs_show, NULL},
1522 	{"userptr", userptr_show, NULL},
1523 	{"vm", vm_show, NULL},
1524 	{"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
1525 	{"mmu", mmu_show, mmu_asid_va_write},
1526 	{"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
1527 	{"engines", engines_show, NULL},
1528 };
1529 
1530 static int hl_debugfs_open(struct inode *inode, struct file *file)
1531 {
1532 	struct hl_debugfs_entry *node = inode->i_private;
1533 
1534 	return single_open(file, node->info_ent->show, node);
1535 }
1536 
1537 static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
1538 		size_t count, loff_t *f_pos)
1539 {
1540 	struct hl_debugfs_entry *node = file->f_inode->i_private;
1541 
1542 	if (node->info_ent->write)
1543 		return node->info_ent->write(file, buf, count, f_pos);
1544 	else
1545 		return -EINVAL;
1546 
1547 }
1548 
1549 static const struct file_operations hl_debugfs_fops = {
1550 	.owner = THIS_MODULE,
1551 	.open = hl_debugfs_open,
1552 	.read = seq_read,
1553 	.write = hl_debugfs_write,
1554 	.llseek = seq_lseek,
1555 	.release = single_release,
1556 };
1557 
1558 static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry, struct dentry *root)
1559 {
1560 	debugfs_create_u8("i2c_bus",
1561 				0644,
1562 				root,
1563 				&dev_entry->i2c_bus);
1564 
1565 	debugfs_create_u8("i2c_addr",
1566 				0644,
1567 				root,
1568 				&dev_entry->i2c_addr);
1569 
1570 	debugfs_create_u8("i2c_reg",
1571 				0644,
1572 				root,
1573 				&dev_entry->i2c_reg);
1574 
1575 	debugfs_create_u8("i2c_len",
1576 				0644,
1577 				root,
1578 				&dev_entry->i2c_len);
1579 
1580 	debugfs_create_file("i2c_data",
1581 				0644,
1582 				root,
1583 				dev_entry,
1584 				&hl_i2c_data_fops);
1585 
1586 	debugfs_create_file("led0",
1587 				0200,
1588 				root,
1589 				dev_entry,
1590 				&hl_led0_fops);
1591 
1592 	debugfs_create_file("led1",
1593 				0200,
1594 				root,
1595 				dev_entry,
1596 				&hl_led1_fops);
1597 
1598 	debugfs_create_file("led2",
1599 				0200,
1600 				root,
1601 				dev_entry,
1602 				&hl_led2_fops);
1603 }
1604 
1605 static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_entry *dev_entry,
1606 				struct dentry *root)
1607 {
1608 	int count = ARRAY_SIZE(hl_debugfs_list);
1609 	struct hl_debugfs_entry *entry;
1610 	int i;
1611 
1612 	debugfs_create_x64("memory_scrub_val",
1613 				0644,
1614 				root,
1615 				&hdev->memory_scrub_val);
1616 
1617 	debugfs_create_file("memory_scrub",
1618 				0200,
1619 				root,
1620 				dev_entry,
1621 				&hl_mem_scrub_fops);
1622 
1623 	debugfs_create_x64("addr",
1624 				0644,
1625 				root,
1626 				&dev_entry->addr);
1627 
1628 	debugfs_create_file("data32",
1629 				0644,
1630 				root,
1631 				dev_entry,
1632 				&hl_data32b_fops);
1633 
1634 	debugfs_create_file("data64",
1635 				0644,
1636 				root,
1637 				dev_entry,
1638 				&hl_data64b_fops);
1639 
1640 	debugfs_create_file("set_power_state",
1641 				0644,
1642 				root,
1643 				dev_entry,
1644 				&hl_power_fops);
1645 
1646 	debugfs_create_file("device",
1647 				0644,
1648 				root,
1649 				dev_entry,
1650 				&hl_device_fops);
1651 
1652 	debugfs_create_file("clk_gate",
1653 				0644,
1654 				root,
1655 				dev_entry,
1656 				&hl_clk_gate_fops);
1657 
1658 	debugfs_create_file("stop_on_err",
1659 				0644,
1660 				root,
1661 				dev_entry,
1662 				&hl_stop_on_err_fops);
1663 
1664 	debugfs_create_file("dump_security_violations",
1665 				0400,
1666 				root,
1667 				dev_entry,
1668 				&hl_security_violations_fops);
1669 
1670 	debugfs_create_file("dump_razwi_events",
1671 				0400,
1672 				root,
1673 				dev_entry,
1674 				&hl_razwi_check_fops);
1675 
1676 	debugfs_create_file("dma_size",
1677 				0200,
1678 				root,
1679 				dev_entry,
1680 				&hl_dma_size_fops);
1681 
1682 	debugfs_create_blob("data_dma",
1683 				0400,
1684 				root,
1685 				&dev_entry->data_dma_blob_desc);
1686 
1687 	debugfs_create_file("monitor_dump_trig",
1688 				0200,
1689 				root,
1690 				dev_entry,
1691 				&hl_monitor_dump_fops);
1692 
1693 	debugfs_create_blob("monitor_dump",
1694 				0400,
1695 				root,
1696 				&dev_entry->mon_dump_blob_desc);
1697 
1698 	debugfs_create_x8("skip_reset_on_timeout",
1699 				0644,
1700 				root,
1701 				&hdev->reset_info.skip_reset_on_timeout);
1702 
1703 	debugfs_create_file("state_dump",
1704 				0644,
1705 				root,
1706 				dev_entry,
1707 				&hl_state_dump_fops);
1708 
1709 	debugfs_create_file("timeout_locked",
1710 				0644,
1711 				root,
1712 				dev_entry,
1713 				&hl_timeout_locked_fops);
1714 
1715 	debugfs_create_u32("device_release_watchdog_timeout",
1716 				0644,
1717 				root,
1718 				&hdev->device_release_watchdog_timeout_sec);
1719 
1720 	debugfs_create_u16("server_type",
1721 				0444,
1722 				root,
1723 				&hdev->asic_prop.server_type);
1724 
1725 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
1726 		debugfs_create_file(hl_debugfs_list[i].name,
1727 					0644,
1728 					root,
1729 					entry,
1730 					&hl_debugfs_fops);
1731 		entry->info_ent = &hl_debugfs_list[i];
1732 		entry->dev_entry = dev_entry;
1733 	}
1734 }
1735 
1736 int hl_debugfs_device_init(struct hl_device *hdev)
1737 {
1738 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1739 	int count = ARRAY_SIZE(hl_debugfs_list);
1740 
1741 	dev_entry->hdev = hdev;
1742 	dev_entry->entry_arr = kmalloc_array(count, sizeof(struct hl_debugfs_entry), GFP_KERNEL);
1743 	if (!dev_entry->entry_arr)
1744 		return -ENOMEM;
1745 
1746 	dev_entry->data_dma_blob_desc.size = 0;
1747 	dev_entry->data_dma_blob_desc.data = NULL;
1748 	dev_entry->mon_dump_blob_desc.size = 0;
1749 	dev_entry->mon_dump_blob_desc.data = NULL;
1750 
1751 	INIT_LIST_HEAD(&dev_entry->file_list);
1752 	INIT_LIST_HEAD(&dev_entry->cb_list);
1753 	INIT_LIST_HEAD(&dev_entry->cs_list);
1754 	INIT_LIST_HEAD(&dev_entry->cs_job_list);
1755 	INIT_LIST_HEAD(&dev_entry->userptr_list);
1756 	INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
1757 	mutex_init(&dev_entry->file_mutex);
1758 	init_rwsem(&dev_entry->state_dump_sem);
1759 	spin_lock_init(&dev_entry->cb_spinlock);
1760 	spin_lock_init(&dev_entry->cs_spinlock);
1761 	spin_lock_init(&dev_entry->cs_job_spinlock);
1762 	spin_lock_init(&dev_entry->userptr_spinlock);
1763 	mutex_init(&dev_entry->ctx_mem_hash_mutex);
1764 
1765 	return 0;
1766 }
1767 
1768 void hl_debugfs_device_fini(struct hl_device *hdev)
1769 {
1770 	struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
1771 	int i;
1772 
1773 	mutex_destroy(&entry->ctx_mem_hash_mutex);
1774 	mutex_destroy(&entry->file_mutex);
1775 
1776 	vfree(entry->data_dma_blob_desc.data);
1777 	vfree(entry->mon_dump_blob_desc.data);
1778 
1779 	for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
1780 		vfree(entry->state_dump[i]);
1781 
1782 	kfree(entry->entry_arr);
1783 }
1784 
1785 void hl_debugfs_add_device(struct hl_device *hdev)
1786 {
1787 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1788 
1789 	dev_entry->root = hdev->drm.accel->debugfs_root;
1790 
1791 	add_files_to_device(hdev, dev_entry, dev_entry->root);
1792 
1793 	if (!hdev->asic_prop.fw_security_enabled)
1794 		add_secured_nodes(dev_entry, dev_entry->root);
1795 }
1796 
1797 void hl_debugfs_add_file(struct hl_fpriv *hpriv)
1798 {
1799 	struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
1800 
1801 	mutex_lock(&dev_entry->file_mutex);
1802 	list_add(&hpriv->debugfs_list, &dev_entry->file_list);
1803 	mutex_unlock(&dev_entry->file_mutex);
1804 }
1805 
1806 void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
1807 {
1808 	struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
1809 
1810 	mutex_lock(&dev_entry->file_mutex);
1811 	list_del(&hpriv->debugfs_list);
1812 	mutex_unlock(&dev_entry->file_mutex);
1813 }
1814 
1815 void hl_debugfs_add_cb(struct hl_cb *cb)
1816 {
1817 	struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
1818 
1819 	spin_lock(&dev_entry->cb_spinlock);
1820 	list_add(&cb->debugfs_list, &dev_entry->cb_list);
1821 	spin_unlock(&dev_entry->cb_spinlock);
1822 }
1823 
1824 void hl_debugfs_remove_cb(struct hl_cb *cb)
1825 {
1826 	struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
1827 
1828 	spin_lock(&dev_entry->cb_spinlock);
1829 	list_del(&cb->debugfs_list);
1830 	spin_unlock(&dev_entry->cb_spinlock);
1831 }
1832 
1833 void hl_debugfs_add_cs(struct hl_cs *cs)
1834 {
1835 	struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
1836 
1837 	spin_lock(&dev_entry->cs_spinlock);
1838 	list_add(&cs->debugfs_list, &dev_entry->cs_list);
1839 	spin_unlock(&dev_entry->cs_spinlock);
1840 }
1841 
1842 void hl_debugfs_remove_cs(struct hl_cs *cs)
1843 {
1844 	struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
1845 
1846 	spin_lock(&dev_entry->cs_spinlock);
1847 	list_del(&cs->debugfs_list);
1848 	spin_unlock(&dev_entry->cs_spinlock);
1849 }
1850 
1851 void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
1852 {
1853 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1854 
1855 	spin_lock(&dev_entry->cs_job_spinlock);
1856 	list_add(&job->debugfs_list, &dev_entry->cs_job_list);
1857 	spin_unlock(&dev_entry->cs_job_spinlock);
1858 }
1859 
1860 void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
1861 {
1862 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1863 
1864 	spin_lock(&dev_entry->cs_job_spinlock);
1865 	list_del(&job->debugfs_list);
1866 	spin_unlock(&dev_entry->cs_job_spinlock);
1867 }
1868 
1869 void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
1870 {
1871 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1872 
1873 	spin_lock(&dev_entry->userptr_spinlock);
1874 	list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
1875 	spin_unlock(&dev_entry->userptr_spinlock);
1876 }
1877 
1878 void hl_debugfs_remove_userptr(struct hl_device *hdev,
1879 				struct hl_userptr *userptr)
1880 {
1881 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1882 
1883 	spin_lock(&dev_entry->userptr_spinlock);
1884 	list_del(&userptr->debugfs_list);
1885 	spin_unlock(&dev_entry->userptr_spinlock);
1886 }
1887 
1888 void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
1889 {
1890 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1891 
1892 	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
1893 	list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
1894 	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
1895 }
1896 
1897 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
1898 {
1899 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1900 
1901 	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
1902 	list_del(&ctx->debugfs_list);
1903 	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
1904 }
1905 
1906 /**
1907  * hl_debugfs_set_state_dump - register state dump making it accessible via
1908  *                             debugfs
1909  * @hdev: pointer to the device structure
1910  * @data: the actual dump data
1911  * @length: the length of the data
1912  */
1913 void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
1914 					unsigned long length)
1915 {
1916 	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
1917 
1918 	down_write(&dev_entry->state_dump_sem);
1919 
1920 	dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
1921 					ARRAY_SIZE(dev_entry->state_dump);
1922 	vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
1923 	dev_entry->state_dump[dev_entry->state_dump_head] = data;
1924 
1925 	up_write(&dev_entry->state_dump_sem);
1926 }
1927