1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2021 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8 #include "habanalabs.h"
9 #include "hldio.h"
10 #include "../include/hw_ip/mmu/mmu_general.h"
11
12 #include <linux/pci.h>
13 #include <linux/uaccess.h>
14 #include <linux/vmalloc.h>
15 #include <linux/iommu.h>
16
17 #define MMU_ADDR_BUF_SIZE 40
18 #define MMU_ASID_BUF_SIZE 10
19 #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
20 #define I2C_MAX_TRANSACTION_LEN 8
21
hl_debugfs_i2c_read(struct hl_device * hdev,u8 i2c_bus,u8 i2c_addr,u8 i2c_reg,u8 i2c_len,u64 * val)22 static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
23 u8 i2c_reg, u8 i2c_len, u64 *val)
24 {
25 struct cpucp_packet pkt;
26 int rc;
27
28 if (!hl_device_operational(hdev, NULL))
29 return -EBUSY;
30
31 if (i2c_len > I2C_MAX_TRANSACTION_LEN) {
32 dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",
33 i2c_len, I2C_MAX_TRANSACTION_LEN);
34 return -EINVAL;
35 }
36
37 memset(&pkt, 0, sizeof(pkt));
38
39 pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_RD <<
40 CPUCP_PKT_CTL_OPCODE_SHIFT);
41 pkt.i2c_bus = i2c_bus;
42 pkt.i2c_addr = i2c_addr;
43 pkt.i2c_reg = i2c_reg;
44 pkt.i2c_len = i2c_len;
45
46 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, val);
47 if (rc && rc != -EAGAIN)
48 dev_err(hdev->dev, "Failed to read from I2C, error %d\n", rc);
49
50 return rc;
51 }
52
hl_debugfs_i2c_write(struct hl_device * hdev,u8 i2c_bus,u8 i2c_addr,u8 i2c_reg,u8 i2c_len,u64 val)53 static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
54 u8 i2c_reg, u8 i2c_len, u64 val)
55 {
56 struct cpucp_packet pkt;
57 int rc;
58
59 if (!hl_device_operational(hdev, NULL))
60 return -EBUSY;
61
62 if (i2c_len > I2C_MAX_TRANSACTION_LEN) {
63 dev_err(hdev->dev, "I2C transaction length %u, exceeds maximum of %u\n",
64 i2c_len, I2C_MAX_TRANSACTION_LEN);
65 return -EINVAL;
66 }
67
68 memset(&pkt, 0, sizeof(pkt));
69
70 pkt.ctl = cpu_to_le32(CPUCP_PACKET_I2C_WR <<
71 CPUCP_PKT_CTL_OPCODE_SHIFT);
72 pkt.i2c_bus = i2c_bus;
73 pkt.i2c_addr = i2c_addr;
74 pkt.i2c_reg = i2c_reg;
75 pkt.i2c_len = i2c_len;
76 pkt.value = cpu_to_le64(val);
77
78 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
79 if (rc && rc != -EAGAIN)
80 dev_err(hdev->dev, "Failed to write to I2C, error %d\n", rc);
81
82 return rc;
83 }
84
hl_debugfs_led_set(struct hl_device * hdev,u8 led,u8 state)85 static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
86 {
87 struct cpucp_packet pkt;
88 int rc;
89
90 if (!hl_device_operational(hdev, NULL))
91 return;
92
93 memset(&pkt, 0, sizeof(pkt));
94
95 pkt.ctl = cpu_to_le32(CPUCP_PACKET_LED_SET <<
96 CPUCP_PKT_CTL_OPCODE_SHIFT);
97 pkt.led_index = cpu_to_le32(led);
98 pkt.value = cpu_to_le64(state);
99
100 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
101 if (rc && rc != -EAGAIN)
102 dev_err(hdev->dev, "Failed to set LED %d, error %d\n", led, rc);
103 }
104
command_buffers_show(struct seq_file * s,void * data)105 static int command_buffers_show(struct seq_file *s, void *data)
106 {
107 struct hl_debugfs_entry *entry = s->private;
108 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
109 struct hl_cb *cb;
110 bool first = true;
111
112 spin_lock(&dev_entry->cb_spinlock);
113
114 list_for_each_entry(cb, &dev_entry->cb_list, debugfs_list) {
115 if (first) {
116 first = false;
117 seq_puts(s, "\n");
118 seq_puts(s, " CB ID CTX ID CB size CB RefCnt mmap? CS counter\n");
119 seq_puts(s, "---------------------------------------------------------------\n");
120 }
121 seq_printf(s,
122 " %03llu %d 0x%08x %d %d %d\n",
123 cb->buf->handle, cb->ctx->asid, cb->size,
124 kref_read(&cb->buf->refcount),
125 atomic_read(&cb->buf->mmap), atomic_read(&cb->cs_cnt));
126 }
127
128 spin_unlock(&dev_entry->cb_spinlock);
129
130 if (!first)
131 seq_puts(s, "\n");
132
133 return 0;
134 }
135
command_submission_show(struct seq_file * s,void * data)136 static int command_submission_show(struct seq_file *s, void *data)
137 {
138 struct hl_debugfs_entry *entry = s->private;
139 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
140 struct hl_cs *cs;
141 bool first = true;
142
143 spin_lock(&dev_entry->cs_spinlock);
144
145 list_for_each_entry(cs, &dev_entry->cs_list, debugfs_list) {
146 if (first) {
147 first = false;
148 seq_puts(s, "\n");
149 seq_puts(s, " CS ID CS TYPE CTX ASID CS RefCnt Submitted Completed\n");
150 seq_puts(s, "----------------------------------------------------------------\n");
151 }
152 seq_printf(s,
153 " %llu %d %d %d %d %d\n",
154 cs->sequence, cs->type, cs->ctx->asid,
155 kref_read(&cs->refcount),
156 cs->submitted, cs->completed);
157 }
158
159 spin_unlock(&dev_entry->cs_spinlock);
160
161 if (!first)
162 seq_puts(s, "\n");
163
164 return 0;
165 }
166
command_submission_jobs_show(struct seq_file * s,void * data)167 static int command_submission_jobs_show(struct seq_file *s, void *data)
168 {
169 struct hl_debugfs_entry *entry = s->private;
170 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
171 struct hl_cs_job *job;
172 bool first = true;
173
174 spin_lock(&dev_entry->cs_job_spinlock);
175
176 list_for_each_entry(job, &dev_entry->cs_job_list, debugfs_list) {
177 if (first) {
178 first = false;
179 seq_puts(s, "\n");
180 seq_puts(s, " JOB ID CS ID CS TYPE CTX ASID JOB RefCnt H/W Queue\n");
181 seq_puts(s, "---------------------------------------------------------------\n");
182 }
183 if (job->cs)
184 seq_printf(s,
185 " %02d %llu %d %d %d %d\n",
186 job->id, job->cs->sequence, job->cs->type,
187 job->cs->ctx->asid, kref_read(&job->refcount),
188 job->hw_queue_id);
189 else
190 seq_printf(s,
191 " %02d 0 0 %d %d %d\n",
192 job->id, HL_KERNEL_ASID_ID,
193 kref_read(&job->refcount), job->hw_queue_id);
194 }
195
196 spin_unlock(&dev_entry->cs_job_spinlock);
197
198 if (!first)
199 seq_puts(s, "\n");
200
201 return 0;
202 }
203
userptr_show(struct seq_file * s,void * data)204 static int userptr_show(struct seq_file *s, void *data)
205 {
206 struct hl_debugfs_entry *entry = s->private;
207 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
208 struct hl_userptr *userptr;
209 char dma_dir[4][30] = {"DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
210 "DMA_FROM_DEVICE", "DMA_NONE"};
211 bool first = true;
212
213 spin_lock(&dev_entry->userptr_spinlock);
214
215 list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
216 if (first) {
217 first = false;
218 seq_puts(s, "\n");
219 seq_puts(s, " pid user virtual address size dma dir\n");
220 seq_puts(s, "----------------------------------------------------------\n");
221 }
222 seq_printf(s, " %-7d 0x%-14llx %-10llu %-30s\n",
223 userptr->pid, userptr->addr, userptr->size,
224 dma_dir[userptr->dir]);
225 }
226
227 spin_unlock(&dev_entry->userptr_spinlock);
228
229 if (!first)
230 seq_puts(s, "\n");
231
232 return 0;
233 }
234
vm_show(struct seq_file * s,void * data)235 static int vm_show(struct seq_file *s, void *data)
236 {
237 struct hl_debugfs_entry *entry = s->private;
238 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
239 struct hl_vm_hw_block_list_node *lnode;
240 struct hl_ctx *ctx;
241 struct hl_vm *vm;
242 struct hl_vm_hash_node *hnode;
243 struct hl_userptr *userptr;
244 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
245 struct hl_va_range *va_range;
246 struct hl_vm_va_block *va_block;
247 enum vm_type *vm_type;
248 bool once = true;
249 u64 j;
250 int i;
251
252 mutex_lock(&dev_entry->ctx_mem_hash_mutex);
253
254 list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
255 once = false;
256 seq_puts(s, "\n\n----------------------------------------------------");
257 seq_puts(s, "\n----------------------------------------------------\n\n");
258 seq_printf(s, "ctx asid: %u\n", ctx->asid);
259
260 seq_puts(s, "\nmappings:\n\n");
261 seq_puts(s, " virtual address size handle\n");
262 seq_puts(s, "----------------------------------------------------\n");
263 mutex_lock(&ctx->mem_hash_lock);
264 hash_for_each(ctx->mem_hash, i, hnode, node) {
265 vm_type = hnode->ptr;
266
267 if (*vm_type == VM_TYPE_USERPTR) {
268 userptr = hnode->ptr;
269 seq_printf(s,
270 " 0x%-14llx %-10llu\n",
271 hnode->vaddr, userptr->size);
272 } else {
273 phys_pg_pack = hnode->ptr;
274 seq_printf(s,
275 " 0x%-14llx %-10llu %-4u\n",
276 hnode->vaddr, phys_pg_pack->total_size,
277 phys_pg_pack->handle);
278 }
279 }
280 mutex_unlock(&ctx->mem_hash_lock);
281
282 if (ctx->asid != HL_KERNEL_ASID_ID &&
283 !list_empty(&ctx->hw_block_mem_list)) {
284 seq_puts(s, "\nhw_block mappings:\n\n");
285 seq_puts(s,
286 " virtual address block size mapped size HW block id\n");
287 seq_puts(s,
288 "---------------------------------------------------------------\n");
289 mutex_lock(&ctx->hw_block_list_lock);
290 list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
291 seq_printf(s,
292 " 0x%-14lx %-6u %-6u %-9u\n",
293 lnode->vaddr, lnode->block_size, lnode->mapped_size,
294 lnode->id);
295 }
296 mutex_unlock(&ctx->hw_block_list_lock);
297 }
298
299 vm = &ctx->hdev->vm;
300 spin_lock(&vm->idr_lock);
301
302 if (!idr_is_empty(&vm->phys_pg_pack_handles))
303 seq_puts(s, "\n\nallocations:\n");
304
305 idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_pack, i) {
306 if (phys_pg_pack->asid != ctx->asid)
307 continue;
308
309 seq_printf(s, "\nhandle: %u\n", phys_pg_pack->handle);
310 seq_printf(s, "page size: %u\n\n",
311 phys_pg_pack->page_size);
312 seq_puts(s, " physical address\n");
313 seq_puts(s, "---------------------\n");
314 for (j = 0 ; j < phys_pg_pack->npages ; j++) {
315 seq_printf(s, " 0x%-14llx\n",
316 phys_pg_pack->pages[j]);
317 }
318 }
319 spin_unlock(&vm->idr_lock);
320
321 }
322
323 mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
324
325 ctx = hl_get_compute_ctx(dev_entry->hdev);
326 if (ctx) {
327 seq_puts(s, "\nVA ranges:\n\n");
328 for (i = HL_VA_RANGE_TYPE_HOST ; i < HL_VA_RANGE_TYPE_MAX ; ++i) {
329 va_range = ctx->va_range[i];
330 seq_printf(s, " va_range %d\n", i);
331 seq_puts(s, "---------------------\n");
332 mutex_lock(&va_range->lock);
333 list_for_each_entry(va_block, &va_range->list, node) {
334 seq_printf(s, "%#16llx - %#16llx (%#llx)\n",
335 va_block->start, va_block->end,
336 va_block->size);
337 }
338 mutex_unlock(&va_range->lock);
339 seq_puts(s, "\n");
340 }
341 hl_ctx_put(ctx);
342 }
343
344 if (!once)
345 seq_puts(s, "\n");
346
347 return 0;
348 }
349
userptr_lookup_show(struct seq_file * s,void * data)350 static int userptr_lookup_show(struct seq_file *s, void *data)
351 {
352 struct hl_debugfs_entry *entry = s->private;
353 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
354 struct scatterlist *sg;
355 struct hl_userptr *userptr;
356 bool first = true;
357 u64 total_npages, npages, sg_start, sg_end;
358 dma_addr_t dma_addr;
359 int i;
360
361 spin_lock(&dev_entry->userptr_spinlock);
362
363 list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
364 if (dev_entry->userptr_lookup >= userptr->addr &&
365 dev_entry->userptr_lookup < userptr->addr + userptr->size) {
366 total_npages = 0;
367 for_each_sgtable_dma_sg(userptr->sgt, sg, i) {
368 npages = hl_get_sg_info(sg, &dma_addr);
369 sg_start = userptr->addr +
370 total_npages * PAGE_SIZE;
371 sg_end = userptr->addr +
372 (total_npages + npages) * PAGE_SIZE;
373
374 if (dev_entry->userptr_lookup >= sg_start &&
375 dev_entry->userptr_lookup < sg_end) {
376 dma_addr += (dev_entry->userptr_lookup -
377 sg_start);
378 if (first) {
379 first = false;
380 seq_puts(s, "\n");
381 seq_puts(s, " user virtual address dma address pid region start region size\n");
382 seq_puts(s, "---------------------------------------------------------------------------------------\n");
383 }
384 seq_printf(s, " 0x%-18llx 0x%-16llx %-8u 0x%-16llx %-12llu\n",
385 dev_entry->userptr_lookup,
386 (u64)dma_addr, userptr->pid,
387 userptr->addr, userptr->size);
388 }
389 total_npages += npages;
390 }
391 }
392 }
393
394 spin_unlock(&dev_entry->userptr_spinlock);
395
396 if (!first)
397 seq_puts(s, "\n");
398
399 return 0;
400 }
401
userptr_lookup_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)402 static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
403 size_t count, loff_t *f_pos)
404 {
405 struct seq_file *s = file->private_data;
406 struct hl_debugfs_entry *entry = s->private;
407 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
408 ssize_t rc;
409 u64 value;
410
411 rc = kstrtoull_from_user(buf, count, 16, &value);
412 if (rc)
413 return rc;
414
415 dev_entry->userptr_lookup = value;
416
417 return count;
418 }
419
mmu_show(struct seq_file * s,void * data)420 static int mmu_show(struct seq_file *s, void *data)
421 {
422 struct hl_debugfs_entry *entry = s->private;
423 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
424 struct hl_device *hdev = dev_entry->hdev;
425 struct hl_ctx *ctx;
426 struct hl_mmu_hop_info hops_info = {0};
427 u64 virt_addr = dev_entry->mmu_addr, phys_addr;
428 int i;
429
430 if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
431 ctx = hdev->kernel_ctx;
432 else
433 ctx = hl_get_compute_ctx(hdev);
434
435 if (!ctx) {
436 dev_err(hdev->dev, "no ctx available\n");
437 return 0;
438 }
439
440 if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) {
441 dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
442 virt_addr);
443 goto put_ctx;
444 }
445
446 hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
447
448 if (hops_info.scrambled_vaddr &&
449 (dev_entry->mmu_addr != hops_info.scrambled_vaddr))
450 seq_printf(s,
451 "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
452 dev_entry->mmu_asid, dev_entry->mmu_addr,
453 hops_info.scrambled_vaddr,
454 hops_info.unscrambled_paddr, phys_addr);
455 else
456 seq_printf(s,
457 "asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
458 dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
459
460 for (i = 0 ; i < hops_info.used_hops ; i++) {
461 seq_printf(s, "hop%d_addr: 0x%llx\n",
462 i, hops_info.hop_info[i].hop_addr);
463 seq_printf(s, "hop%d_pte_addr: 0x%llx\n",
464 i, hops_info.hop_info[i].hop_pte_addr);
465 seq_printf(s, "hop%d_pte: 0x%llx\n",
466 i, hops_info.hop_info[i].hop_pte_val);
467 }
468
469 put_ctx:
470 if (dev_entry->mmu_asid != HL_KERNEL_ASID_ID)
471 hl_ctx_put(ctx);
472
473 return 0;
474 }
475
mmu_asid_va_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)476 static ssize_t mmu_asid_va_write(struct file *file, const char __user *buf,
477 size_t count, loff_t *f_pos)
478 {
479 struct seq_file *s = file->private_data;
480 struct hl_debugfs_entry *entry = s->private;
481 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
482 struct hl_device *hdev = dev_entry->hdev;
483 char kbuf[MMU_KBUF_SIZE] = {0};
484 char *c;
485 ssize_t rc;
486
487 if (count > sizeof(kbuf) - 1)
488 goto err;
489 if (copy_from_user(kbuf, buf, count))
490 goto err;
491 kbuf[count] = 0;
492
493 c = strchr(kbuf, ' ');
494 if (!c)
495 goto err;
496 *c = '\0';
497
498 rc = kstrtouint(kbuf, 10, &dev_entry->mmu_asid);
499 if (rc)
500 goto err;
501
502 if (strncmp(c+1, "0x", 2))
503 goto err;
504 rc = kstrtoull(c+3, 16, &dev_entry->mmu_addr);
505 if (rc)
506 goto err;
507
508 return count;
509
510 err:
511 dev_err(hdev->dev, "usage: echo <asid> <0xaddr> > mmu\n");
512
513 return -EINVAL;
514 }
515
mmu_ack_error(struct seq_file * s,void * data)516 static int mmu_ack_error(struct seq_file *s, void *data)
517 {
518 struct hl_debugfs_entry *entry = s->private;
519 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
520 struct hl_device *hdev = dev_entry->hdev;
521 int rc;
522
523 if (!dev_entry->mmu_cap_mask) {
524 dev_err(hdev->dev, "mmu_cap_mask is not set\n");
525 goto err;
526 }
527
528 rc = hdev->asic_funcs->ack_mmu_errors(hdev, dev_entry->mmu_cap_mask);
529 if (rc)
530 goto err;
531
532 return 0;
533 err:
534 return -EINVAL;
535 }
536
mmu_ack_error_value_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)537 static ssize_t mmu_ack_error_value_write(struct file *file,
538 const char __user *buf,
539 size_t count, loff_t *f_pos)
540 {
541 struct seq_file *s = file->private_data;
542 struct hl_debugfs_entry *entry = s->private;
543 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
544 struct hl_device *hdev = dev_entry->hdev;
545 char kbuf[MMU_KBUF_SIZE] = {0};
546 ssize_t rc;
547
548 if (count > sizeof(kbuf) - 1)
549 goto err;
550
551 if (copy_from_user(kbuf, buf, count))
552 goto err;
553
554 kbuf[count] = 0;
555
556 if (strncmp(kbuf, "0x", 2))
557 goto err;
558
559 rc = kstrtoull(kbuf, 16, &dev_entry->mmu_cap_mask);
560 if (rc)
561 goto err;
562
563 return count;
564 err:
565 dev_err(hdev->dev, "usage: echo <0xmmu_cap_mask > > mmu_error\n");
566
567 return -EINVAL;
568 }
569
engines_show(struct seq_file * s,void * data)570 static int engines_show(struct seq_file *s, void *data)
571 {
572 struct hl_debugfs_entry *entry = s->private;
573 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
574 struct hl_device *hdev = dev_entry->hdev;
575 struct engines_data eng_data;
576
577 if (hdev->reset_info.in_reset) {
578 dev_warn_ratelimited(hdev->dev,
579 "Can't check device idle during reset\n");
580 return 0;
581 }
582
583 eng_data.actual_size = 0;
584 eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
585 eng_data.buf = vmalloc(eng_data.allocated_buf_size);
586 if (!eng_data.buf)
587 return -ENOMEM;
588
589 hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
590
591 if (eng_data.actual_size > eng_data.allocated_buf_size) {
592 dev_err(hdev->dev,
593 "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
594 eng_data.actual_size, eng_data.allocated_buf_size);
595 vfree(eng_data.buf);
596 return -ENOMEM;
597 }
598
599 seq_write(s, eng_data.buf, eng_data.actual_size);
600
601 vfree(eng_data.buf);
602
603 return 0;
604 }
605
606 #ifdef CONFIG_HL_HLDIO
607 /* DIO debugfs functions following the standard pattern */
dio_ssd2hl_show(struct seq_file * s,void * data)608 static int dio_ssd2hl_show(struct seq_file *s, void *data)
609 {
610 struct hl_debugfs_entry *entry = s->private;
611 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
612 struct hl_device *hdev = dev_entry->hdev;
613
614 if (!hdev->asic_prop.supports_nvme) {
615 seq_puts(s, "NVMe Direct I/O not supported\\n");
616 return 0;
617 }
618
619 seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n");
620 seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read);
621 seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n");
622
623 return 0;
624 }
625
dio_ssd2hl_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)626 static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf,
627 size_t count, loff_t *f_pos)
628 {
629 struct seq_file *s = file->private_data;
630 struct hl_debugfs_entry *entry = s->private;
631 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
632 struct hl_device *hdev = dev_entry->hdev;
633 struct hl_ctx *ctx = hdev->kernel_ctx;
634 char kbuf[128];
635 u64 device_va = 0, off_bytes = 0, len_bytes = 0;
636 u32 fd = 0;
637 size_t len_read = 0;
638 int rc, parsed;
639
640 if (!hdev->asic_prop.supports_nvme)
641 return -EOPNOTSUPP;
642
643 if (count >= sizeof(kbuf))
644 return -EINVAL;
645
646 if (copy_from_user(kbuf, buf, count))
647 return -EFAULT;
648
649 kbuf[count] = 0;
650
651 /* Parse: fd=N va=0xADDR off=N len=N */
652 parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu",
653 &fd, &device_va, &off_bytes, &len_bytes);
654 if (parsed != 4) {
655 dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n");
656 return -EINVAL;
657 }
658
659 /* Validate file descriptor */
660 if (fd == 0) {
661 dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd);
662 return -EINVAL;
663 }
664
665 /* Validate alignment requirements */
666 if (!IS_ALIGNED(device_va, PAGE_SIZE) ||
667 !IS_ALIGNED(off_bytes, PAGE_SIZE) ||
668 !IS_ALIGNED(len_bytes, PAGE_SIZE)) {
669 dev_err(hdev->dev,
670 "All parameters must be page-aligned (4KB)\\n");
671 return -EINVAL;
672 }
673
674 /* Validate transfer size */
675 if (len_bytes == 0 || len_bytes > SZ_1G) {
676 dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n",
677 len_bytes);
678 return -EINVAL;
679 }
680
681 dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n",
682 fd, device_va, off_bytes, len_bytes);
683
684 rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read);
685 if (rc < 0) {
686 dev_entry->dio_stats.failed_ops++;
687 dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc);
688 return rc;
689 }
690
691 /* Update statistics */
692 dev_entry->dio_stats.total_ops++;
693 dev_entry->dio_stats.successful_ops++;
694 dev_entry->dio_stats.bytes_transferred += len_read;
695 dev_entry->dio_stats.last_len_read = len_read;
696
697 dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read);
698
699 return count;
700 }
701
dio_hl2ssd_show(struct seq_file * s,void * data)702 static int dio_hl2ssd_show(struct seq_file *s, void *data)
703 {
704 seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n");
705 return 0;
706 }
707
dio_hl2ssd_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)708 static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf,
709 size_t count, loff_t *f_pos)
710 {
711 struct seq_file *s = file->private_data;
712 struct hl_debugfs_entry *entry = s->private;
713 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
714 struct hl_device *hdev = dev_entry->hdev;
715
716 if (!hdev->asic_prop.supports_nvme)
717 return -EOPNOTSUPP;
718
719 dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n");
720 return -EOPNOTSUPP;
721 }
722
dio_stats_show(struct seq_file * s,void * data)723 static int dio_stats_show(struct seq_file *s, void *data)
724 {
725 struct hl_debugfs_entry *entry = s->private;
726 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
727 struct hl_device *hdev = dev_entry->hdev;
728 struct hl_dio_stats *stats = &dev_entry->dio_stats;
729 u64 avg_bytes_per_op = 0, success_rate = 0;
730
731 if (!hdev->asic_prop.supports_nvme) {
732 seq_puts(s, "NVMe Direct I/O not supported\\n");
733 return 0;
734 }
735
736 if (stats->successful_ops > 0)
737 avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops;
738
739 if (stats->total_ops > 0)
740 success_rate = (stats->successful_ops * 100) / stats->total_ops;
741
742 seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n");
743 seq_printf(s, "Total operations: %llu\\n", stats->total_ops);
744 seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops);
745 seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops);
746 seq_printf(s, "Success rate: %llu%%\\n", success_rate);
747 seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred);
748 seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op);
749 seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read);
750
751 return 0;
752 }
753
dio_reset_show(struct seq_file * s,void * data)754 static int dio_reset_show(struct seq_file *s, void *data)
755 {
756 seq_puts(s, "Write '1' to reset DIO statistics\\n");
757 return 0;
758 }
759
dio_reset_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)760 static ssize_t dio_reset_write(struct file *file, const char __user *buf,
761 size_t count, loff_t *f_pos)
762 {
763 struct seq_file *s = file->private_data;
764 struct hl_debugfs_entry *entry = s->private;
765 struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
766 struct hl_device *hdev = dev_entry->hdev;
767 char kbuf[8];
768 unsigned long val;
769 int rc;
770
771 if (!hdev->asic_prop.supports_nvme)
772 return -EOPNOTSUPP;
773
774 if (count >= sizeof(kbuf))
775 return -EINVAL;
776
777 if (copy_from_user(kbuf, buf, count))
778 return -EFAULT;
779
780 kbuf[count] = 0;
781
782 rc = kstrtoul(kbuf, 0, &val);
783 if (rc)
784 return rc;
785
786 if (val == 1) {
787 memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
788 dev_dbg(hdev->dev, "DIO statistics reset\\n");
789 } else {
790 dev_err(hdev->dev, "Write '1' to reset statistics\\n");
791 return -EINVAL;
792 }
793
794 return count;
795 }
796 #endif
797
hl_memory_scrub(struct file * f,const char __user * buf,size_t count,loff_t * ppos)798 static ssize_t hl_memory_scrub(struct file *f, const char __user *buf,
799 size_t count, loff_t *ppos)
800 {
801 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
802 struct hl_device *hdev = entry->hdev;
803 u64 val = hdev->memory_scrub_val;
804 int rc;
805
806 if (!hl_device_operational(hdev, NULL)) {
807 dev_warn_ratelimited(hdev->dev, "Can't scrub memory, device is not operational\n");
808 return -EIO;
809 }
810
811 mutex_lock(&hdev->fpriv_list_lock);
812 if (hdev->is_compute_ctx_active) {
813 mutex_unlock(&hdev->fpriv_list_lock);
814 dev_err(hdev->dev, "can't scrub dram, context exist\n");
815 return -EBUSY;
816 }
817 hdev->is_in_dram_scrub = true;
818 mutex_unlock(&hdev->fpriv_list_lock);
819
820 rc = hdev->asic_funcs->scrub_device_dram(hdev, val);
821
822 mutex_lock(&hdev->fpriv_list_lock);
823 hdev->is_in_dram_scrub = false;
824 mutex_unlock(&hdev->fpriv_list_lock);
825
826 if (rc)
827 return rc;
828 return count;
829 }
830
hl_is_device_va(struct hl_device * hdev,u64 addr)831 static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
832 {
833 struct asic_fixed_properties *prop = &hdev->asic_prop;
834
835 if (prop->dram_supports_virtual_memory &&
836 (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
837 return true;
838
839 if (addr >= prop->pmmu.start_addr &&
840 addr < prop->pmmu.end_addr)
841 return true;
842
843 if (addr >= prop->pmmu_huge.start_addr &&
844 addr < prop->pmmu_huge.end_addr)
845 return true;
846
847 return false;
848 }
849
hl_is_device_internal_memory_va(struct hl_device * hdev,u64 addr,u32 size)850 static bool hl_is_device_internal_memory_va(struct hl_device *hdev, u64 addr,
851 u32 size)
852 {
853 struct asic_fixed_properties *prop = &hdev->asic_prop;
854 u64 dram_start_addr, dram_end_addr;
855
856 if (prop->dram_supports_virtual_memory) {
857 dram_start_addr = prop->dmmu.start_addr;
858 dram_end_addr = prop->dmmu.end_addr;
859 } else {
860 dram_start_addr = prop->dram_base_address;
861 dram_end_addr = prop->dram_end_address;
862 }
863
864 if (hl_mem_area_inside_range(addr, size, dram_start_addr,
865 dram_end_addr))
866 return true;
867
868 if (hl_mem_area_inside_range(addr, size, prop->sram_base_address,
869 prop->sram_end_address))
870 return true;
871
872 return false;
873 }
874
device_va_to_pa(struct hl_device * hdev,u64 virt_addr,u32 size,u64 * phys_addr)875 static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
876 u64 *phys_addr)
877 {
878 struct hl_vm_phys_pg_pack *phys_pg_pack;
879 struct hl_ctx *ctx;
880 struct hl_vm_hash_node *hnode;
881 u64 end_address, range_size;
882 struct hl_userptr *userptr;
883 enum vm_type *vm_type;
884 bool valid = false;
885 int i, rc = 0;
886
887 ctx = hl_get_compute_ctx(hdev);
888
889 if (!ctx) {
890 dev_err(hdev->dev, "no ctx available\n");
891 return -EINVAL;
892 }
893
894 /* Verify address is mapped */
895 mutex_lock(&ctx->mem_hash_lock);
896 hash_for_each(ctx->mem_hash, i, hnode, node) {
897 vm_type = hnode->ptr;
898
899 if (*vm_type == VM_TYPE_USERPTR) {
900 userptr = hnode->ptr;
901 range_size = userptr->size;
902 } else {
903 phys_pg_pack = hnode->ptr;
904 range_size = phys_pg_pack->total_size;
905 }
906
907 end_address = virt_addr + size;
908 if ((virt_addr >= hnode->vaddr) &&
909 (end_address <= hnode->vaddr + range_size)) {
910 valid = true;
911 break;
912 }
913 }
914 mutex_unlock(&ctx->mem_hash_lock);
915
916 if (!valid) {
917 dev_err(hdev->dev,
918 "virt addr 0x%llx is not mapped\n",
919 virt_addr);
920 rc = -EINVAL;
921 goto put_ctx;
922 }
923
924 rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr);
925 if (rc) {
926 dev_err(hdev->dev,
927 "virt addr 0x%llx is not mapped to phys addr\n",
928 virt_addr);
929 rc = -EINVAL;
930 }
931
932 put_ctx:
933 hl_ctx_put(ctx);
934
935 return rc;
936 }
937
hl_access_dev_mem_by_region(struct hl_device * hdev,u64 addr,u64 * val,enum debugfs_access_type acc_type,bool * found)938 static int hl_access_dev_mem_by_region(struct hl_device *hdev, u64 addr,
939 u64 *val, enum debugfs_access_type acc_type, bool *found)
940 {
941 size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?
942 sizeof(u64) : sizeof(u32);
943 struct pci_mem_region *mem_reg;
944 int i;
945
946 for (i = 0; i < PCI_REGION_NUMBER; i++) {
947 mem_reg = &hdev->pci_mem_region[i];
948 if (!mem_reg->used)
949 continue;
950 if (addr >= mem_reg->region_base &&
951 addr <= mem_reg->region_base + mem_reg->region_size - acc_size) {
952 *found = true;
953 return hdev->asic_funcs->access_dev_mem(hdev, i, addr, val, acc_type);
954 }
955 }
956 return 0;
957 }
958
hl_access_host_mem(struct hl_device * hdev,u64 addr,u64 * val,enum debugfs_access_type acc_type)959 static void hl_access_host_mem(struct hl_device *hdev, u64 addr, u64 *val,
960 enum debugfs_access_type acc_type)
961 {
962 struct asic_fixed_properties *prop = &hdev->asic_prop;
963 u64 offset = prop->device_dma_offset_for_host_access;
964
965 switch (acc_type) {
966 case DEBUGFS_READ32:
967 *val = *(u32 *) phys_to_virt(addr - offset);
968 break;
969 case DEBUGFS_WRITE32:
970 *(u32 *) phys_to_virt(addr - offset) = *val;
971 break;
972 case DEBUGFS_READ64:
973 *val = *(u64 *) phys_to_virt(addr - offset);
974 break;
975 case DEBUGFS_WRITE64:
976 *(u64 *) phys_to_virt(addr - offset) = *val;
977 break;
978 default:
979 dev_err(hdev->dev, "hostmem access-type %d id not supported\n", acc_type);
980 break;
981 }
982 }
983
dump_cfg_access_entry(struct hl_device * hdev,struct hl_debugfs_cfg_access_entry * entry)984 static void dump_cfg_access_entry(struct hl_device *hdev,
985 struct hl_debugfs_cfg_access_entry *entry)
986 {
987 char *access_type = "";
988 struct tm tm;
989
990 switch (entry->debugfs_type) {
991 case DEBUGFS_READ32:
992 access_type = "READ32 from";
993 break;
994 case DEBUGFS_WRITE32:
995 access_type = "WRITE32 to";
996 break;
997 case DEBUGFS_READ64:
998 access_type = "READ64 from";
999 break;
1000 case DEBUGFS_WRITE64:
1001 access_type = "WRITE64 to";
1002 break;
1003 default:
1004 dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type);
1005 return;
1006 }
1007
1008 time64_to_tm(entry->seconds_since_epoch, 0, &tm);
1009 dev_info(hdev->dev,
1010 "%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1,
1011 tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr);
1012 }
1013
hl_debugfs_cfg_access_history_dump(struct hl_device * hdev)1014 void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev)
1015 {
1016 struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses;
1017 u32 i, head, count = 0;
1018 time64_t entry_time, now;
1019 unsigned long flags;
1020
1021 now = ktime_get_real_seconds();
1022
1023 spin_lock_irqsave(&dbgfs->lock, flags);
1024 head = dbgfs->head;
1025 if (head == 0)
1026 i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
1027 else
1028 i = head - 1;
1029
1030 /* Walk back until timeout or invalid entry */
1031 while (dbgfs->cfg_access_list[i].valid) {
1032 entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch;
1033 /* Stop when entry is older than timeout */
1034 if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC)
1035 break;
1036
1037 /* print single entry under lock */
1038 {
1039 struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i];
1040 /*
1041 * We copy the entry out under lock and then print after
1042 * releasing the lock to minimize time under lock.
1043 */
1044 spin_unlock_irqrestore(&dbgfs->lock, flags);
1045 dump_cfg_access_entry(hdev, &entry);
1046 spin_lock_irqsave(&dbgfs->lock, flags);
1047 }
1048
1049 /* mark consumed */
1050 dbgfs->cfg_access_list[i].valid = false;
1051
1052 if (i == 0)
1053 i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1;
1054 else
1055 i--;
1056 count++;
1057 if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN)
1058 break;
1059 }
1060 spin_unlock_irqrestore(&dbgfs->lock, flags);
1061 }
1062
check_if_cfg_access_and_log(struct hl_device * hdev,u64 addr,size_t access_size,enum debugfs_access_type access_type)1063 static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size,
1064 enum debugfs_access_type access_type)
1065 {
1066 struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses;
1067 struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG];
1068 struct hl_debugfs_cfg_access_entry *new_entry;
1069 unsigned long flags;
1070
1071 /* Check if address is in config memory */
1072 if (addr >= mem_reg->region_base &&
1073 mem_reg->region_size >= access_size &&
1074 addr <= mem_reg->region_base + mem_reg->region_size - access_size) {
1075
1076 spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags);
1077
1078 new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head];
1079 new_entry->seconds_since_epoch = ktime_get_real_seconds();
1080 new_entry->addr = addr;
1081 new_entry->debugfs_type = access_type;
1082 new_entry->valid = true;
1083 dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1)
1084 % HL_DBGFS_CFG_ACCESS_HIST_LEN;
1085
1086 spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags);
1087
1088 }
1089 }
1090
hl_access_mem(struct hl_device * hdev,u64 addr,u64 * val,enum debugfs_access_type acc_type)1091 static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val,
1092 enum debugfs_access_type acc_type)
1093 {
1094 size_t acc_size = (acc_type == DEBUGFS_READ64 || acc_type == DEBUGFS_WRITE64) ?
1095 sizeof(u64) : sizeof(u32);
1096 u64 host_start = hdev->asic_prop.host_base_address;
1097 u64 host_end = hdev->asic_prop.host_end_address;
1098 bool user_address, found = false;
1099 int rc;
1100
1101 user_address = hl_is_device_va(hdev, addr);
1102 if (user_address) {
1103 rc = device_va_to_pa(hdev, addr, acc_size, &addr);
1104 if (rc)
1105 return rc;
1106 }
1107
1108 check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type);
1109 rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found);
1110 if (rc) {
1111 dev_err(hdev->dev,
1112 "Failed reading addr %#llx from dev mem (%d)\n",
1113 addr, rc);
1114 return rc;
1115 }
1116
1117 if (found)
1118 return 0;
1119
1120 if (!user_address || device_iommu_mapped(&hdev->pdev->dev)) {
1121 rc = -EINVAL;
1122 goto err;
1123 }
1124
1125 if (addr >= host_start && addr <= host_end - acc_size) {
1126 hl_access_host_mem(hdev, addr, val, acc_type);
1127 } else {
1128 rc = -EINVAL;
1129 goto err;
1130 }
1131
1132 return 0;
1133 err:
1134 dev_err(hdev->dev, "invalid addr %#llx\n", addr);
1135 return rc;
1136 }
1137
hl_data_read32(struct file * f,char __user * buf,size_t count,loff_t * ppos)1138 static ssize_t hl_data_read32(struct file *f, char __user *buf,
1139 size_t count, loff_t *ppos)
1140 {
1141 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1142 struct hl_device *hdev = entry->hdev;
1143 u64 value64, addr = entry->addr;
1144 char tmp_buf[32];
1145 ssize_t rc;
1146 u32 val;
1147
1148 if (hdev->reset_info.in_reset) {
1149 dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
1150 return 0;
1151 }
1152
1153 if (*ppos)
1154 return 0;
1155
1156 rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_READ32);
1157 if (rc)
1158 return rc;
1159
1160 val = value64; /* downcast back to 32 */
1161
1162 sprintf(tmp_buf, "0x%08x\n", val);
1163 return simple_read_from_buffer(buf, count, ppos, tmp_buf,
1164 strlen(tmp_buf));
1165 }
1166
hl_data_write32(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1167 static ssize_t hl_data_write32(struct file *f, const char __user *buf,
1168 size_t count, loff_t *ppos)
1169 {
1170 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1171 struct hl_device *hdev = entry->hdev;
1172 u64 value64, addr = entry->addr;
1173 u32 value;
1174 ssize_t rc;
1175
1176 if (hdev->reset_info.in_reset) {
1177 dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
1178 return 0;
1179 }
1180
1181 rc = kstrtouint_from_user(buf, count, 16, &value);
1182 if (rc)
1183 return rc;
1184
1185 value64 = value;
1186 rc = hl_access_mem(hdev, addr, &value64, DEBUGFS_WRITE32);
1187 if (rc)
1188 return rc;
1189
1190 return count;
1191 }
1192
hl_data_read64(struct file * f,char __user * buf,size_t count,loff_t * ppos)1193 static ssize_t hl_data_read64(struct file *f, char __user *buf,
1194 size_t count, loff_t *ppos)
1195 {
1196 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1197 struct hl_device *hdev = entry->hdev;
1198 u64 addr = entry->addr;
1199 char tmp_buf[32];
1200 ssize_t rc;
1201 u64 val;
1202
1203 if (hdev->reset_info.in_reset) {
1204 dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
1205 return 0;
1206 }
1207
1208 if (*ppos)
1209 return 0;
1210
1211 rc = hl_access_mem(hdev, addr, &val, DEBUGFS_READ64);
1212 if (rc)
1213 return rc;
1214
1215 sprintf(tmp_buf, "0x%016llx\n", val);
1216 return simple_read_from_buffer(buf, count, ppos, tmp_buf,
1217 strlen(tmp_buf));
1218 }
1219
hl_data_write64(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1220 static ssize_t hl_data_write64(struct file *f, const char __user *buf,
1221 size_t count, loff_t *ppos)
1222 {
1223 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1224 struct hl_device *hdev = entry->hdev;
1225 u64 addr = entry->addr;
1226 u64 value;
1227 ssize_t rc;
1228
1229 if (hdev->reset_info.in_reset) {
1230 dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
1231 return 0;
1232 }
1233
1234 rc = kstrtoull_from_user(buf, count, 16, &value);
1235 if (rc)
1236 return rc;
1237
1238 rc = hl_access_mem(hdev, addr, &value, DEBUGFS_WRITE64);
1239 if (rc)
1240 return rc;
1241
1242 return count;
1243 }
1244
hl_dma_size_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1245 static ssize_t hl_dma_size_write(struct file *f, const char __user *buf,
1246 size_t count, loff_t *ppos)
1247 {
1248 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1249 struct hl_device *hdev = entry->hdev;
1250 u64 addr = entry->addr;
1251 ssize_t rc;
1252 u32 size;
1253
1254 if (hdev->reset_info.in_reset) {
1255 dev_warn_ratelimited(hdev->dev, "Can't DMA during reset\n");
1256 return 0;
1257 }
1258 rc = kstrtouint_from_user(buf, count, 16, &size);
1259 if (rc)
1260 return rc;
1261
1262 if (!size) {
1263 dev_err(hdev->dev, "DMA read failed. size can't be 0\n");
1264 return -EINVAL;
1265 }
1266
1267 if (size > SZ_128M) {
1268 dev_err(hdev->dev,
1269 "DMA read failed. size can't be larger than 128MB\n");
1270 return -EINVAL;
1271 }
1272
1273 if (!hl_is_device_internal_memory_va(hdev, addr, size)) {
1274 dev_err(hdev->dev,
1275 "DMA read failed. Invalid 0x%010llx + 0x%08x\n",
1276 addr, size);
1277 return -EINVAL;
1278 }
1279
1280 /* Free the previous allocation, if there was any */
1281 entry->data_dma_blob_desc.size = 0;
1282 vfree(entry->data_dma_blob_desc.data);
1283
1284 entry->data_dma_blob_desc.data = vmalloc(size);
1285 if (!entry->data_dma_blob_desc.data)
1286 return -ENOMEM;
1287
1288 rc = hdev->asic_funcs->debugfs_read_dma(hdev, addr, size,
1289 entry->data_dma_blob_desc.data);
1290 if (rc) {
1291 dev_err(hdev->dev, "Failed to DMA from 0x%010llx\n", addr);
1292 vfree(entry->data_dma_blob_desc.data);
1293 entry->data_dma_blob_desc.data = NULL;
1294 return -EIO;
1295 }
1296
1297 entry->data_dma_blob_desc.size = size;
1298
1299 return count;
1300 }
1301
hl_monitor_dump_trigger(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1302 static ssize_t hl_monitor_dump_trigger(struct file *f, const char __user *buf,
1303 size_t count, loff_t *ppos)
1304 {
1305 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1306 struct hl_device *hdev = entry->hdev;
1307 u32 size, trig;
1308 ssize_t rc;
1309
1310 if (hdev->reset_info.in_reset) {
1311 dev_warn_ratelimited(hdev->dev, "Can't dump monitors during reset\n");
1312 return 0;
1313 }
1314 rc = kstrtouint_from_user(buf, count, 10, &trig);
1315 if (rc)
1316 return rc;
1317
1318 if (trig != 1) {
1319 dev_err(hdev->dev, "Must write 1 to trigger monitor dump\n");
1320 return -EINVAL;
1321 }
1322
1323 size = sizeof(struct cpucp_monitor_dump);
1324
1325 /* Free the previous allocation, if there was any */
1326 entry->mon_dump_blob_desc.size = 0;
1327 vfree(entry->mon_dump_blob_desc.data);
1328
1329 entry->mon_dump_blob_desc.data = vmalloc(size);
1330 if (!entry->mon_dump_blob_desc.data)
1331 return -ENOMEM;
1332
1333 rc = hdev->asic_funcs->get_monitor_dump(hdev, entry->mon_dump_blob_desc.data);
1334 if (rc) {
1335 dev_err(hdev->dev, "Failed to dump monitors\n");
1336 vfree(entry->mon_dump_blob_desc.data);
1337 entry->mon_dump_blob_desc.data = NULL;
1338 return -EIO;
1339 }
1340
1341 entry->mon_dump_blob_desc.size = size;
1342
1343 return count;
1344 }
1345
hl_get_power_state(struct file * f,char __user * buf,size_t count,loff_t * ppos)1346 static ssize_t hl_get_power_state(struct file *f, char __user *buf,
1347 size_t count, loff_t *ppos)
1348 {
1349 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1350 struct hl_device *hdev = entry->hdev;
1351 char tmp_buf[200];
1352 int i;
1353
1354 if (*ppos)
1355 return 0;
1356
1357 if (hdev->pdev->current_state == PCI_D0)
1358 i = 1;
1359 else if (hdev->pdev->current_state == PCI_D3hot)
1360 i = 2;
1361 else
1362 i = 3;
1363
1364 sprintf(tmp_buf,
1365 "current power state: %d\n1 - D0\n2 - D3hot\n3 - Unknown\n", i);
1366 return simple_read_from_buffer(buf, count, ppos, tmp_buf,
1367 strlen(tmp_buf));
1368 }
1369
hl_set_power_state(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1370 static ssize_t hl_set_power_state(struct file *f, const char __user *buf,
1371 size_t count, loff_t *ppos)
1372 {
1373 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1374 struct hl_device *hdev = entry->hdev;
1375 u32 value;
1376 ssize_t rc;
1377
1378 rc = kstrtouint_from_user(buf, count, 10, &value);
1379 if (rc)
1380 return rc;
1381
1382 if (value == 1) {
1383 pci_set_power_state(hdev->pdev, PCI_D0);
1384 pci_restore_state(hdev->pdev);
1385 rc = pci_enable_device(hdev->pdev);
1386 if (rc < 0)
1387 return rc;
1388 } else if (value == 2) {
1389 pci_save_state(hdev->pdev);
1390 pci_disable_device(hdev->pdev);
1391 pci_set_power_state(hdev->pdev, PCI_D3hot);
1392 } else {
1393 dev_dbg(hdev->dev, "invalid power state value %u\n", value);
1394 return -EINVAL;
1395 }
1396
1397 return count;
1398 }
1399
hl_i2c_data_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1400 static ssize_t hl_i2c_data_read(struct file *f, char __user *buf,
1401 size_t count, loff_t *ppos)
1402 {
1403 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1404 struct hl_device *hdev = entry->hdev;
1405 char tmp_buf[32];
1406 u64 val;
1407 ssize_t rc;
1408
1409 if (*ppos)
1410 return 0;
1411
1412 rc = hl_debugfs_i2c_read(hdev, entry->i2c_bus, entry->i2c_addr,
1413 entry->i2c_reg, entry->i2c_len, &val);
1414 if (rc) {
1415 dev_err(hdev->dev,
1416 "Failed to read from I2C bus %d, addr %d, reg %d, len %d\n",
1417 entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);
1418 return rc;
1419 }
1420
1421 sprintf(tmp_buf, "%#02llx\n", val);
1422 rc = simple_read_from_buffer(buf, count, ppos, tmp_buf,
1423 strlen(tmp_buf));
1424
1425 return rc;
1426 }
1427
hl_i2c_data_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1428 static ssize_t hl_i2c_data_write(struct file *f, const char __user *buf,
1429 size_t count, loff_t *ppos)
1430 {
1431 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1432 struct hl_device *hdev = entry->hdev;
1433 u64 value;
1434 ssize_t rc;
1435
1436 rc = kstrtou64_from_user(buf, count, 16, &value);
1437 if (rc)
1438 return rc;
1439
1440 rc = hl_debugfs_i2c_write(hdev, entry->i2c_bus, entry->i2c_addr,
1441 entry->i2c_reg, entry->i2c_len, value);
1442 if (rc) {
1443 dev_err(hdev->dev,
1444 "Failed to write %#02llx to I2C bus %d, addr %d, reg %d, len %d\n",
1445 value, entry->i2c_bus, entry->i2c_addr, entry->i2c_reg, entry->i2c_len);
1446 return rc;
1447 }
1448
1449 return count;
1450 }
1451
hl_led0_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1452 static ssize_t hl_led0_write(struct file *f, const char __user *buf,
1453 size_t count, loff_t *ppos)
1454 {
1455 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1456 struct hl_device *hdev = entry->hdev;
1457 u32 value;
1458 ssize_t rc;
1459
1460 rc = kstrtouint_from_user(buf, count, 10, &value);
1461 if (rc)
1462 return rc;
1463
1464 value = value ? 1 : 0;
1465
1466 hl_debugfs_led_set(hdev, 0, value);
1467
1468 return count;
1469 }
1470
hl_led1_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1471 static ssize_t hl_led1_write(struct file *f, const char __user *buf,
1472 size_t count, loff_t *ppos)
1473 {
1474 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1475 struct hl_device *hdev = entry->hdev;
1476 u32 value;
1477 ssize_t rc;
1478
1479 rc = kstrtouint_from_user(buf, count, 10, &value);
1480 if (rc)
1481 return rc;
1482
1483 value = value ? 1 : 0;
1484
1485 hl_debugfs_led_set(hdev, 1, value);
1486
1487 return count;
1488 }
1489
hl_led2_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1490 static ssize_t hl_led2_write(struct file *f, const char __user *buf,
1491 size_t count, loff_t *ppos)
1492 {
1493 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1494 struct hl_device *hdev = entry->hdev;
1495 u32 value;
1496 ssize_t rc;
1497
1498 rc = kstrtouint_from_user(buf, count, 10, &value);
1499 if (rc)
1500 return rc;
1501
1502 value = value ? 1 : 0;
1503
1504 hl_debugfs_led_set(hdev, 2, value);
1505
1506 return count;
1507 }
1508
hl_device_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1509 static ssize_t hl_device_read(struct file *f, char __user *buf,
1510 size_t count, loff_t *ppos)
1511 {
1512 static const char *help =
1513 "Valid values: disable, enable, suspend, resume, cpu_timeout\n";
1514 return simple_read_from_buffer(buf, count, ppos, help, strlen(help));
1515 }
1516
hl_device_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1517 static ssize_t hl_device_write(struct file *f, const char __user *buf,
1518 size_t count, loff_t *ppos)
1519 {
1520 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1521 struct hl_device *hdev = entry->hdev;
1522 char data[30] = {0};
1523
1524 /* don't allow partial writes */
1525 if (*ppos != 0)
1526 return 0;
1527
1528 simple_write_to_buffer(data, 29, ppos, buf, count);
1529
1530 if (strncmp("disable", data, strlen("disable")) == 0) {
1531 hdev->disabled = true;
1532 } else if (strncmp("enable", data, strlen("enable")) == 0) {
1533 hdev->disabled = false;
1534 } else if (strncmp("suspend", data, strlen("suspend")) == 0) {
1535 hdev->asic_funcs->suspend(hdev);
1536 } else if (strncmp("resume", data, strlen("resume")) == 0) {
1537 hdev->asic_funcs->resume(hdev);
1538 } else if (strncmp("cpu_timeout", data, strlen("cpu_timeout")) == 0) {
1539 hdev->device_cpu_disabled = true;
1540 } else {
1541 dev_err(hdev->dev,
1542 "Valid values: disable, enable, suspend, resume, cpu_timeout\n");
1543 count = -EINVAL;
1544 }
1545
1546 return count;
1547 }
1548
hl_clk_gate_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1549 static ssize_t hl_clk_gate_read(struct file *f, char __user *buf,
1550 size_t count, loff_t *ppos)
1551 {
1552 return 0;
1553 }
1554
hl_clk_gate_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1555 static ssize_t hl_clk_gate_write(struct file *f, const char __user *buf,
1556 size_t count, loff_t *ppos)
1557 {
1558 return count;
1559 }
1560
hl_stop_on_err_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1561 static ssize_t hl_stop_on_err_read(struct file *f, char __user *buf,
1562 size_t count, loff_t *ppos)
1563 {
1564 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1565 struct hl_device *hdev = entry->hdev;
1566 char tmp_buf[200];
1567 ssize_t rc;
1568
1569 if (!hdev->asic_prop.configurable_stop_on_err)
1570 return -EOPNOTSUPP;
1571
1572 if (*ppos)
1573 return 0;
1574
1575 sprintf(tmp_buf, "%d\n", hdev->stop_on_err);
1576 rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
1577 strlen(tmp_buf) + 1);
1578
1579 return rc;
1580 }
1581
hl_stop_on_err_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1582 static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
1583 size_t count, loff_t *ppos)
1584 {
1585 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1586 struct hl_device *hdev = entry->hdev;
1587 u32 value;
1588 ssize_t rc;
1589
1590 if (!hdev->asic_prop.configurable_stop_on_err)
1591 return -EOPNOTSUPP;
1592
1593 if (hdev->reset_info.in_reset) {
1594 dev_warn_ratelimited(hdev->dev,
1595 "Can't change stop on error during reset\n");
1596 return 0;
1597 }
1598
1599 rc = kstrtouint_from_user(buf, count, 10, &value);
1600 if (rc)
1601 return rc;
1602
1603 hdev->stop_on_err = value ? 1 : 0;
1604
1605 hl_device_reset(hdev, 0);
1606
1607 return count;
1608 }
1609
hl_security_violations_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1610 static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
1611 size_t count, loff_t *ppos)
1612 {
1613 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1614 struct hl_device *hdev = entry->hdev;
1615
1616 hdev->asic_funcs->ack_protection_bits_errors(hdev);
1617
1618 return 0;
1619 }
1620
hl_state_dump_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1621 static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
1622 size_t count, loff_t *ppos)
1623 {
1624 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1625 ssize_t rc;
1626
1627 down_read(&entry->state_dump_sem);
1628 if (!entry->state_dump[entry->state_dump_head])
1629 rc = 0;
1630 else
1631 rc = simple_read_from_buffer(
1632 buf, count, ppos,
1633 entry->state_dump[entry->state_dump_head],
1634 strlen(entry->state_dump[entry->state_dump_head]));
1635 up_read(&entry->state_dump_sem);
1636
1637 return rc;
1638 }
1639
hl_state_dump_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1640 static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
1641 size_t count, loff_t *ppos)
1642 {
1643 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1644 struct hl_device *hdev = entry->hdev;
1645 ssize_t rc;
1646 u32 size;
1647 int i;
1648
1649 rc = kstrtouint_from_user(buf, count, 10, &size);
1650 if (rc)
1651 return rc;
1652
1653 if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
1654 dev_err(hdev->dev, "Invalid number of dumps to skip\n");
1655 return -EINVAL;
1656 }
1657
1658 if (entry->state_dump[entry->state_dump_head]) {
1659 down_write(&entry->state_dump_sem);
1660 for (i = 0; i < size; ++i) {
1661 vfree(entry->state_dump[entry->state_dump_head]);
1662 entry->state_dump[entry->state_dump_head] = NULL;
1663 if (entry->state_dump_head > 0)
1664 entry->state_dump_head--;
1665 else
1666 entry->state_dump_head =
1667 ARRAY_SIZE(entry->state_dump) - 1;
1668 }
1669 up_write(&entry->state_dump_sem);
1670 }
1671
1672 return count;
1673 }
1674
hl_timeout_locked_read(struct file * f,char __user * buf,size_t count,loff_t * ppos)1675 static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
1676 size_t count, loff_t *ppos)
1677 {
1678 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1679 struct hl_device *hdev = entry->hdev;
1680 char tmp_buf[200];
1681 ssize_t rc;
1682
1683 if (*ppos)
1684 return 0;
1685
1686 sprintf(tmp_buf, "%d\n",
1687 jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
1688 rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
1689 strlen(tmp_buf) + 1);
1690
1691 return rc;
1692 }
1693
hl_timeout_locked_write(struct file * f,const char __user * buf,size_t count,loff_t * ppos)1694 static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
1695 size_t count, loff_t *ppos)
1696 {
1697 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1698 struct hl_device *hdev = entry->hdev;
1699 u32 value;
1700 ssize_t rc;
1701
1702 rc = kstrtouint_from_user(buf, count, 10, &value);
1703 if (rc)
1704 return rc;
1705
1706 if (value)
1707 hdev->timeout_jiffies = secs_to_jiffies(value);
1708 else
1709 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
1710
1711 return count;
1712 }
1713
hl_check_razwi_happened(struct file * f,char __user * buf,size_t count,loff_t * ppos)1714 static ssize_t hl_check_razwi_happened(struct file *f, char __user *buf,
1715 size_t count, loff_t *ppos)
1716 {
1717 struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
1718 struct hl_device *hdev = entry->hdev;
1719
1720 hdev->asic_funcs->check_if_razwi_happened(hdev);
1721
1722 return 0;
1723 }
1724
1725 static const struct file_operations hl_mem_scrub_fops = {
1726 .owner = THIS_MODULE,
1727 .write = hl_memory_scrub,
1728 };
1729
1730 static const struct file_operations hl_data32b_fops = {
1731 .owner = THIS_MODULE,
1732 .read = hl_data_read32,
1733 .write = hl_data_write32
1734 };
1735
1736 static const struct file_operations hl_data64b_fops = {
1737 .owner = THIS_MODULE,
1738 .read = hl_data_read64,
1739 .write = hl_data_write64
1740 };
1741
1742 static const struct file_operations hl_dma_size_fops = {
1743 .owner = THIS_MODULE,
1744 .write = hl_dma_size_write
1745 };
1746
1747 static const struct file_operations hl_monitor_dump_fops = {
1748 .owner = THIS_MODULE,
1749 .write = hl_monitor_dump_trigger
1750 };
1751
1752 static const struct file_operations hl_i2c_data_fops = {
1753 .owner = THIS_MODULE,
1754 .read = hl_i2c_data_read,
1755 .write = hl_i2c_data_write
1756 };
1757
1758 static const struct file_operations hl_power_fops = {
1759 .owner = THIS_MODULE,
1760 .read = hl_get_power_state,
1761 .write = hl_set_power_state
1762 };
1763
1764 static const struct file_operations hl_led0_fops = {
1765 .owner = THIS_MODULE,
1766 .write = hl_led0_write
1767 };
1768
1769 static const struct file_operations hl_led1_fops = {
1770 .owner = THIS_MODULE,
1771 .write = hl_led1_write
1772 };
1773
1774 static const struct file_operations hl_led2_fops = {
1775 .owner = THIS_MODULE,
1776 .write = hl_led2_write
1777 };
1778
1779 static const struct file_operations hl_device_fops = {
1780 .owner = THIS_MODULE,
1781 .read = hl_device_read,
1782 .write = hl_device_write
1783 };
1784
1785 static const struct file_operations hl_clk_gate_fops = {
1786 .owner = THIS_MODULE,
1787 .read = hl_clk_gate_read,
1788 .write = hl_clk_gate_write
1789 };
1790
1791 static const struct file_operations hl_stop_on_err_fops = {
1792 .owner = THIS_MODULE,
1793 .read = hl_stop_on_err_read,
1794 .write = hl_stop_on_err_write
1795 };
1796
1797 static const struct file_operations hl_security_violations_fops = {
1798 .owner = THIS_MODULE,
1799 .read = hl_security_violations_read
1800 };
1801
1802 static const struct file_operations hl_state_dump_fops = {
1803 .owner = THIS_MODULE,
1804 .read = hl_state_dump_read,
1805 .write = hl_state_dump_write
1806 };
1807
1808 static const struct file_operations hl_timeout_locked_fops = {
1809 .owner = THIS_MODULE,
1810 .read = hl_timeout_locked_read,
1811 .write = hl_timeout_locked_write
1812 };
1813
1814 static const struct file_operations hl_razwi_check_fops = {
1815 .owner = THIS_MODULE,
1816 .read = hl_check_razwi_happened
1817 };
1818
1819 static const struct hl_info_list hl_debugfs_list[] = {
1820 {"command_buffers", command_buffers_show, NULL},
1821 {"command_submission", command_submission_show, NULL},
1822 {"command_submission_jobs", command_submission_jobs_show, NULL},
1823 {"userptr", userptr_show, NULL},
1824 {"vm", vm_show, NULL},
1825 {"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
1826 {"mmu", mmu_show, mmu_asid_va_write},
1827 {"mmu_error", mmu_ack_error, mmu_ack_error_value_write},
1828 {"engines", engines_show, NULL},
1829 #ifdef CONFIG_HL_HLDIO
1830 /* DIO entries - only created if NVMe is supported */
1831 {"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write},
1832 {"dio_stats", dio_stats_show, NULL},
1833 {"dio_reset", dio_reset_show, dio_reset_write},
1834 {"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write},
1835 #endif
1836 };
1837
hl_debugfs_open(struct inode * inode,struct file * file)1838 static int hl_debugfs_open(struct inode *inode, struct file *file)
1839 {
1840 struct hl_debugfs_entry *node = inode->i_private;
1841
1842 return single_open(file, node->info_ent->show, node);
1843 }
1844
hl_debugfs_write(struct file * file,const char __user * buf,size_t count,loff_t * f_pos)1845 static ssize_t hl_debugfs_write(struct file *file, const char __user *buf,
1846 size_t count, loff_t *f_pos)
1847 {
1848 struct hl_debugfs_entry *node = file->f_inode->i_private;
1849
1850 if (node->info_ent->write)
1851 return node->info_ent->write(file, buf, count, f_pos);
1852 else
1853 return -EINVAL;
1854
1855 }
1856
1857 static const struct file_operations hl_debugfs_fops = {
1858 .owner = THIS_MODULE,
1859 .open = hl_debugfs_open,
1860 .read = seq_read,
1861 .write = hl_debugfs_write,
1862 .llseek = seq_lseek,
1863 .release = single_release,
1864 };
1865
add_secured_nodes(struct hl_dbg_device_entry * dev_entry,struct dentry * root)1866 static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry, struct dentry *root)
1867 {
1868 debugfs_create_u8("i2c_bus",
1869 0644,
1870 root,
1871 &dev_entry->i2c_bus);
1872
1873 debugfs_create_u8("i2c_addr",
1874 0644,
1875 root,
1876 &dev_entry->i2c_addr);
1877
1878 debugfs_create_u8("i2c_reg",
1879 0644,
1880 root,
1881 &dev_entry->i2c_reg);
1882
1883 debugfs_create_u8("i2c_len",
1884 0644,
1885 root,
1886 &dev_entry->i2c_len);
1887
1888 debugfs_create_file("i2c_data",
1889 0644,
1890 root,
1891 dev_entry,
1892 &hl_i2c_data_fops);
1893
1894 debugfs_create_file("led0",
1895 0200,
1896 root,
1897 dev_entry,
1898 &hl_led0_fops);
1899
1900 debugfs_create_file("led1",
1901 0200,
1902 root,
1903 dev_entry,
1904 &hl_led1_fops);
1905
1906 debugfs_create_file("led2",
1907 0200,
1908 root,
1909 dev_entry,
1910 &hl_led2_fops);
1911 }
1912
add_files_to_device(struct hl_device * hdev,struct hl_dbg_device_entry * dev_entry,struct dentry * root)1913 static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_entry *dev_entry,
1914 struct dentry *root)
1915 {
1916 int count = ARRAY_SIZE(hl_debugfs_list);
1917 struct hl_debugfs_entry *entry;
1918 int i;
1919
1920 debugfs_create_x64("memory_scrub_val",
1921 0644,
1922 root,
1923 &hdev->memory_scrub_val);
1924
1925 debugfs_create_file("memory_scrub",
1926 0200,
1927 root,
1928 dev_entry,
1929 &hl_mem_scrub_fops);
1930
1931 debugfs_create_x64("addr",
1932 0644,
1933 root,
1934 &dev_entry->addr);
1935
1936 debugfs_create_file("data32",
1937 0644,
1938 root,
1939 dev_entry,
1940 &hl_data32b_fops);
1941
1942 debugfs_create_file("data64",
1943 0644,
1944 root,
1945 dev_entry,
1946 &hl_data64b_fops);
1947
1948 debugfs_create_file("set_power_state",
1949 0644,
1950 root,
1951 dev_entry,
1952 &hl_power_fops);
1953
1954 debugfs_create_file("device",
1955 0644,
1956 root,
1957 dev_entry,
1958 &hl_device_fops);
1959
1960 debugfs_create_file("clk_gate",
1961 0644,
1962 root,
1963 dev_entry,
1964 &hl_clk_gate_fops);
1965
1966 debugfs_create_file("stop_on_err",
1967 0644,
1968 root,
1969 dev_entry,
1970 &hl_stop_on_err_fops);
1971
1972 debugfs_create_file("dump_security_violations",
1973 0400,
1974 root,
1975 dev_entry,
1976 &hl_security_violations_fops);
1977
1978 debugfs_create_file("dump_razwi_events",
1979 0400,
1980 root,
1981 dev_entry,
1982 &hl_razwi_check_fops);
1983
1984 debugfs_create_file("dma_size",
1985 0200,
1986 root,
1987 dev_entry,
1988 &hl_dma_size_fops);
1989
1990 debugfs_create_blob("data_dma",
1991 0400,
1992 root,
1993 &dev_entry->data_dma_blob_desc);
1994
1995 debugfs_create_file("monitor_dump_trig",
1996 0200,
1997 root,
1998 dev_entry,
1999 &hl_monitor_dump_fops);
2000
2001 debugfs_create_blob("monitor_dump",
2002 0400,
2003 root,
2004 &dev_entry->mon_dump_blob_desc);
2005
2006 debugfs_create_x8("skip_reset_on_timeout",
2007 0644,
2008 root,
2009 &hdev->reset_info.skip_reset_on_timeout);
2010
2011 debugfs_create_file("state_dump",
2012 0644,
2013 root,
2014 dev_entry,
2015 &hl_state_dump_fops);
2016
2017 debugfs_create_file("timeout_locked",
2018 0644,
2019 root,
2020 dev_entry,
2021 &hl_timeout_locked_fops);
2022
2023 debugfs_create_u32("device_release_watchdog_timeout",
2024 0644,
2025 root,
2026 &hdev->device_release_watchdog_timeout_sec);
2027
2028 debugfs_create_u16("server_type",
2029 0444,
2030 root,
2031 &hdev->asic_prop.server_type);
2032
2033 for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
2034 /* Skip DIO entries if NVMe is not supported */
2035 if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 &&
2036 !hdev->asic_prop.supports_nvme)
2037 continue;
2038
2039 debugfs_create_file(hl_debugfs_list[i].name,
2040 0644,
2041 root,
2042 entry,
2043 &hl_debugfs_fops);
2044 entry->info_ent = &hl_debugfs_list[i];
2045 entry->dev_entry = dev_entry;
2046 }
2047 }
2048
hl_debugfs_device_init(struct hl_device * hdev)2049 int hl_debugfs_device_init(struct hl_device *hdev)
2050 {
2051 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2052 int count = ARRAY_SIZE(hl_debugfs_list);
2053
2054 dev_entry->hdev = hdev;
2055 dev_entry->entry_arr = kmalloc_objs(struct hl_debugfs_entry, count);
2056 if (!dev_entry->entry_arr)
2057 return -ENOMEM;
2058
2059 dev_entry->data_dma_blob_desc.size = 0;
2060 dev_entry->data_dma_blob_desc.data = NULL;
2061 dev_entry->mon_dump_blob_desc.size = 0;
2062 dev_entry->mon_dump_blob_desc.data = NULL;
2063
2064 INIT_LIST_HEAD(&dev_entry->file_list);
2065 INIT_LIST_HEAD(&dev_entry->cb_list);
2066 INIT_LIST_HEAD(&dev_entry->cs_list);
2067 INIT_LIST_HEAD(&dev_entry->cs_job_list);
2068 INIT_LIST_HEAD(&dev_entry->userptr_list);
2069 INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
2070 mutex_init(&dev_entry->file_mutex);
2071 init_rwsem(&dev_entry->state_dump_sem);
2072 spin_lock_init(&dev_entry->cb_spinlock);
2073 spin_lock_init(&dev_entry->cs_spinlock);
2074 spin_lock_init(&dev_entry->cs_job_spinlock);
2075 spin_lock_init(&dev_entry->userptr_spinlock);
2076 mutex_init(&dev_entry->ctx_mem_hash_mutex);
2077
2078 spin_lock_init(&hdev->debugfs_cfg_accesses.lock);
2079 hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */
2080
2081 #ifdef CONFIG_HL_HLDIO
2082 /* Initialize DIO statistics */
2083 memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats));
2084 #endif
2085
2086 return 0;
2087 }
2088
hl_debugfs_device_fini(struct hl_device * hdev)2089 void hl_debugfs_device_fini(struct hl_device *hdev)
2090 {
2091 struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
2092 int i;
2093
2094 mutex_destroy(&entry->ctx_mem_hash_mutex);
2095 mutex_destroy(&entry->file_mutex);
2096
2097 vfree(entry->data_dma_blob_desc.data);
2098 vfree(entry->mon_dump_blob_desc.data);
2099
2100 for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
2101 vfree(entry->state_dump[i]);
2102
2103 kfree(entry->entry_arr);
2104
2105 }
2106
hl_debugfs_add_device(struct hl_device * hdev)2107 void hl_debugfs_add_device(struct hl_device *hdev)
2108 {
2109 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2110
2111 dev_entry->root = hdev->drm.accel->debugfs_root;
2112
2113 add_files_to_device(hdev, dev_entry, dev_entry->root);
2114
2115 if (!hdev->asic_prop.fw_security_enabled)
2116 add_secured_nodes(dev_entry, dev_entry->root);
2117
2118 }
2119
hl_debugfs_add_file(struct hl_fpriv * hpriv)2120 void hl_debugfs_add_file(struct hl_fpriv *hpriv)
2121 {
2122 struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
2123
2124 mutex_lock(&dev_entry->file_mutex);
2125 list_add(&hpriv->debugfs_list, &dev_entry->file_list);
2126 mutex_unlock(&dev_entry->file_mutex);
2127 }
2128
hl_debugfs_remove_file(struct hl_fpriv * hpriv)2129 void hl_debugfs_remove_file(struct hl_fpriv *hpriv)
2130 {
2131 struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
2132
2133 mutex_lock(&dev_entry->file_mutex);
2134 list_del(&hpriv->debugfs_list);
2135 mutex_unlock(&dev_entry->file_mutex);
2136 }
2137
hl_debugfs_add_cb(struct hl_cb * cb)2138 void hl_debugfs_add_cb(struct hl_cb *cb)
2139 {
2140 struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
2141
2142 spin_lock(&dev_entry->cb_spinlock);
2143 list_add(&cb->debugfs_list, &dev_entry->cb_list);
2144 spin_unlock(&dev_entry->cb_spinlock);
2145 }
2146
hl_debugfs_remove_cb(struct hl_cb * cb)2147 void hl_debugfs_remove_cb(struct hl_cb *cb)
2148 {
2149 struct hl_dbg_device_entry *dev_entry = &cb->hdev->hl_debugfs;
2150
2151 spin_lock(&dev_entry->cb_spinlock);
2152 list_del(&cb->debugfs_list);
2153 spin_unlock(&dev_entry->cb_spinlock);
2154 }
2155
hl_debugfs_add_cs(struct hl_cs * cs)2156 void hl_debugfs_add_cs(struct hl_cs *cs)
2157 {
2158 struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
2159
2160 spin_lock(&dev_entry->cs_spinlock);
2161 list_add(&cs->debugfs_list, &dev_entry->cs_list);
2162 spin_unlock(&dev_entry->cs_spinlock);
2163 }
2164
hl_debugfs_remove_cs(struct hl_cs * cs)2165 void hl_debugfs_remove_cs(struct hl_cs *cs)
2166 {
2167 struct hl_dbg_device_entry *dev_entry = &cs->ctx->hdev->hl_debugfs;
2168
2169 spin_lock(&dev_entry->cs_spinlock);
2170 list_del(&cs->debugfs_list);
2171 spin_unlock(&dev_entry->cs_spinlock);
2172 }
2173
hl_debugfs_add_job(struct hl_device * hdev,struct hl_cs_job * job)2174 void hl_debugfs_add_job(struct hl_device *hdev, struct hl_cs_job *job)
2175 {
2176 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2177
2178 spin_lock(&dev_entry->cs_job_spinlock);
2179 list_add(&job->debugfs_list, &dev_entry->cs_job_list);
2180 spin_unlock(&dev_entry->cs_job_spinlock);
2181 }
2182
hl_debugfs_remove_job(struct hl_device * hdev,struct hl_cs_job * job)2183 void hl_debugfs_remove_job(struct hl_device *hdev, struct hl_cs_job *job)
2184 {
2185 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2186
2187 spin_lock(&dev_entry->cs_job_spinlock);
2188 list_del(&job->debugfs_list);
2189 spin_unlock(&dev_entry->cs_job_spinlock);
2190 }
2191
hl_debugfs_add_userptr(struct hl_device * hdev,struct hl_userptr * userptr)2192 void hl_debugfs_add_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
2193 {
2194 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2195
2196 spin_lock(&dev_entry->userptr_spinlock);
2197 list_add(&userptr->debugfs_list, &dev_entry->userptr_list);
2198 spin_unlock(&dev_entry->userptr_spinlock);
2199 }
2200
hl_debugfs_remove_userptr(struct hl_device * hdev,struct hl_userptr * userptr)2201 void hl_debugfs_remove_userptr(struct hl_device *hdev,
2202 struct hl_userptr *userptr)
2203 {
2204 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2205
2206 spin_lock(&dev_entry->userptr_spinlock);
2207 list_del(&userptr->debugfs_list);
2208 spin_unlock(&dev_entry->userptr_spinlock);
2209 }
2210
hl_debugfs_add_ctx_mem_hash(struct hl_device * hdev,struct hl_ctx * ctx)2211 void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
2212 {
2213 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2214
2215 mutex_lock(&dev_entry->ctx_mem_hash_mutex);
2216 list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
2217 mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
2218 }
2219
hl_debugfs_remove_ctx_mem_hash(struct hl_device * hdev,struct hl_ctx * ctx)2220 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
2221 {
2222 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2223
2224 mutex_lock(&dev_entry->ctx_mem_hash_mutex);
2225 list_del(&ctx->debugfs_list);
2226 mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
2227 }
2228
2229 /**
2230 * hl_debugfs_set_state_dump - register state dump making it accessible via
2231 * debugfs
2232 * @hdev: pointer to the device structure
2233 * @data: the actual dump data
2234 * @length: the length of the data
2235 */
hl_debugfs_set_state_dump(struct hl_device * hdev,char * data,unsigned long length)2236 void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
2237 unsigned long length)
2238 {
2239 struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
2240
2241 down_write(&dev_entry->state_dump_sem);
2242
2243 dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
2244 ARRAY_SIZE(dev_entry->state_dump);
2245 vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
2246 dev_entry->state_dump[dev_entry->state_dump_head] = data;
2247
2248 up_write(&dev_entry->state_dump_sem);
2249 }
2250
2251