1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2022 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #define pr_fmt(fmt) "habanalabs: " fmt 9 10 #include <uapi/drm/habanalabs_accel.h> 11 #include "habanalabs.h" 12 13 #include <linux/pci.h> 14 #include <linux/hwmon.h> 15 #include <linux/vmalloc.h> 16 17 #include <drm/drm_accel.h> 18 #include <drm/drm_drv.h> 19 20 #include <trace/events/habanalabs.h> 21 22 #define HL_RESET_DELAY_USEC 10000 /* 10ms */ 23 24 #define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC 30 25 26 enum dma_alloc_type { 27 DMA_ALLOC_COHERENT, 28 DMA_ALLOC_POOL, 29 }; 30 31 #define MEM_SCRUB_DEFAULT_VAL 0x1122334455667788 32 33 /* 34 * hl_set_dram_bar- sets the bar to allow later access to address 35 * 36 * @hdev: pointer to habanalabs device structure. 37 * @addr: the address the caller wants to access. 38 * @region: the PCI region. 39 * @new_bar_region_base: the new BAR region base address. 40 * 41 * @return: the old BAR base address on success, U64_MAX for failure. 42 * The caller should set it back to the old address after use. 43 * 44 * In case the bar space does not cover the whole address space, 45 * the bar base address should be set to allow access to a given address. 46 * This function can be called also if the bar doesn't need to be set, 47 * in that case it just won't change the base. 48 */ 49 static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region, 50 u64 *new_bar_region_base) 51 { 52 struct asic_fixed_properties *prop = &hdev->asic_prop; 53 u64 bar_base_addr, old_base; 54 55 if (is_power_of_2(prop->dram_pci_bar_size)) 56 bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 57 else 58 bar_base_addr = region->region_base + 59 div64_u64((addr - region->region_base), prop->dram_pci_bar_size) * 60 prop->dram_pci_bar_size; 61 62 old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 63 64 /* in case of success we need to update the new BAR base */ 65 if ((old_base != U64_MAX) && new_bar_region_base) 66 *new_bar_region_base = bar_base_addr; 67 68 return old_base; 69 } 70 71 int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, 72 enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar) 73 { 74 struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; 75 u64 old_base = 0, rc, bar_region_base = region->region_base; 76 void __iomem *acc_addr; 77 78 if (set_dram_bar) { 79 old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base); 80 if (old_base == U64_MAX) 81 return -EIO; 82 } 83 84 acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar + 85 (addr - bar_region_base); 86 87 switch (acc_type) { 88 case DEBUGFS_READ8: 89 *val = readb(acc_addr); 90 break; 91 case DEBUGFS_WRITE8: 92 writeb(*val, acc_addr); 93 break; 94 case DEBUGFS_READ32: 95 *val = readl(acc_addr); 96 break; 97 case DEBUGFS_WRITE32: 98 writel(*val, acc_addr); 99 break; 100 case DEBUGFS_READ64: 101 *val = readq(acc_addr); 102 break; 103 case DEBUGFS_WRITE64: 104 writeq(*val, acc_addr); 105 break; 106 } 107 108 if (set_dram_bar) { 109 rc = hl_set_dram_bar(hdev, old_base, region, NULL); 110 if (rc == U64_MAX) 111 return -EIO; 112 } 113 114 return 0; 115 } 116 117 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 118 gfp_t flag, enum dma_alloc_type alloc_type, 119 const char *caller) 120 { 121 void *ptr = NULL; 122 123 switch (alloc_type) { 124 case DMA_ALLOC_COHERENT: 125 ptr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, size, dma_handle, flag); 126 break; 127 case DMA_ALLOC_POOL: 128 ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, size, flag, dma_handle); 129 break; 130 } 131 132 if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) 133 trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, 134 caller); 135 136 return ptr; 137 } 138 139 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, 140 dma_addr_t dma_handle, enum dma_alloc_type alloc_type, 141 const char *caller) 142 { 143 /* this is needed to avoid warning on using freed pointer */ 144 u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr; 145 146 switch (alloc_type) { 147 case DMA_ALLOC_COHERENT: 148 hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle); 149 break; 150 case DMA_ALLOC_POOL: 151 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); 152 break; 153 } 154 155 trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller); 156 } 157 158 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 159 gfp_t flag, const char *caller) 160 { 161 return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); 162 } 163 164 void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, 165 dma_addr_t dma_handle, const char *caller) 166 { 167 hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); 168 } 169 170 void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, 171 dma_addr_t *dma_handle, const char *caller) 172 { 173 return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); 174 } 175 176 void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, 177 const char *caller) 178 { 179 hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); 180 } 181 182 void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) 183 { 184 return hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, size, dma_handle); 185 } 186 187 void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 188 { 189 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, size, vaddr); 190 } 191 192 int hl_dma_map_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, 193 enum dma_data_direction dir, const char *caller) 194 { 195 struct asic_fixed_properties *prop = &hdev->asic_prop; 196 struct scatterlist *sg; 197 int rc, i; 198 199 rc = hdev->asic_funcs->dma_map_sgtable(hdev, sgt, dir); 200 if (rc) 201 return rc; 202 203 if (!trace_habanalabs_dma_map_page_enabled()) 204 return 0; 205 206 for_each_sgtable_dma_sg(sgt, sg, i) 207 trace_habanalabs_dma_map_page(hdev->dev, 208 page_to_phys(sg_page(sg)), 209 sg->dma_address - prop->device_dma_offset_for_host_access, 210 #ifdef CONFIG_NEED_SG_DMA_LENGTH 211 sg->dma_length, 212 #else 213 sg->length, 214 #endif 215 dir, caller); 216 217 return 0; 218 } 219 220 int hl_asic_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, 221 enum dma_data_direction dir) 222 { 223 struct asic_fixed_properties *prop = &hdev->asic_prop; 224 struct scatterlist *sg; 225 int rc, i; 226 227 rc = dma_map_sgtable(&hdev->pdev->dev, sgt, dir, 0); 228 if (rc) 229 return rc; 230 231 /* Shift to the device's base physical address of host memory if necessary */ 232 if (prop->device_dma_offset_for_host_access) 233 for_each_sgtable_dma_sg(sgt, sg, i) 234 sg->dma_address += prop->device_dma_offset_for_host_access; 235 236 return 0; 237 } 238 239 void hl_dma_unmap_sgtable_caller(struct hl_device *hdev, struct sg_table *sgt, 240 enum dma_data_direction dir, const char *caller) 241 { 242 struct asic_fixed_properties *prop = &hdev->asic_prop; 243 struct scatterlist *sg; 244 int i; 245 246 hdev->asic_funcs->dma_unmap_sgtable(hdev, sgt, dir); 247 248 if (trace_habanalabs_dma_unmap_page_enabled()) { 249 for_each_sgtable_dma_sg(sgt, sg, i) 250 trace_habanalabs_dma_unmap_page(hdev->dev, page_to_phys(sg_page(sg)), 251 sg->dma_address - prop->device_dma_offset_for_host_access, 252 #ifdef CONFIG_NEED_SG_DMA_LENGTH 253 sg->dma_length, 254 #else 255 sg->length, 256 #endif 257 dir, caller); 258 } 259 } 260 261 void hl_asic_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, 262 enum dma_data_direction dir) 263 { 264 struct asic_fixed_properties *prop = &hdev->asic_prop; 265 struct scatterlist *sg; 266 int i; 267 268 /* Cancel the device's base physical address of host memory if necessary */ 269 if (prop->device_dma_offset_for_host_access) 270 for_each_sgtable_dma_sg(sgt, sg, i) 271 sg->dma_address -= prop->device_dma_offset_for_host_access; 272 273 dma_unmap_sgtable(&hdev->pdev->dev, sgt, dir, 0); 274 } 275 276 /* 277 * hl_access_cfg_region - access the config region 278 * 279 * @hdev: pointer to habanalabs device structure 280 * @addr: the address to access 281 * @val: the value to write from or read to 282 * @acc_type: the type of access (read/write 64/32) 283 */ 284 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val, 285 enum debugfs_access_type acc_type) 286 { 287 struct pci_mem_region *cfg_region = &hdev->pci_mem_region[PCI_REGION_CFG]; 288 u32 val_h, val_l; 289 290 if (!IS_ALIGNED(addr, sizeof(u32))) { 291 dev_err(hdev->dev, "address %#llx not a multiple of %zu\n", addr, sizeof(u32)); 292 return -EINVAL; 293 } 294 295 switch (acc_type) { 296 case DEBUGFS_READ32: 297 *val = RREG32(addr - cfg_region->region_base); 298 break; 299 case DEBUGFS_WRITE32: 300 WREG32(addr - cfg_region->region_base, *val); 301 break; 302 case DEBUGFS_READ64: 303 val_l = RREG32(addr - cfg_region->region_base); 304 val_h = RREG32(addr + sizeof(u32) - cfg_region->region_base); 305 306 *val = (((u64) val_h) << 32) | val_l; 307 break; 308 case DEBUGFS_WRITE64: 309 WREG32(addr - cfg_region->region_base, lower_32_bits(*val)); 310 WREG32(addr + sizeof(u32) - cfg_region->region_base, upper_32_bits(*val)); 311 break; 312 default: 313 dev_err(hdev->dev, "access type %d is not supported\n", acc_type); 314 return -EOPNOTSUPP; 315 } 316 317 return 0; 318 } 319 320 /* 321 * hl_access_dev_mem - access device memory 322 * 323 * @hdev: pointer to habanalabs device structure 324 * @region_type: the type of the region the address belongs to 325 * @addr: the address to access 326 * @val: the value to write from or read to 327 * @acc_type: the type of access (r/w, 32/64) 328 */ 329 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type, 330 u64 addr, u64 *val, enum debugfs_access_type acc_type) 331 { 332 switch (region_type) { 333 case PCI_REGION_CFG: 334 return hl_access_cfg_region(hdev, addr, val, acc_type); 335 case PCI_REGION_SRAM: 336 case PCI_REGION_DRAM: 337 return hl_access_sram_dram_region(hdev, addr, val, acc_type, 338 region_type, (region_type == PCI_REGION_DRAM)); 339 default: 340 return -EFAULT; 341 } 342 343 return 0; 344 } 345 346 void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) 347 { 348 va_list args; 349 int str_size; 350 351 va_start(args, fmt); 352 /* Calculate formatted string length. Assuming each string is null terminated, hence 353 * increment result by 1 354 */ 355 str_size = vsnprintf(NULL, 0, fmt, args) + 1; 356 va_end(args); 357 358 if ((e->actual_size + str_size) < e->allocated_buf_size) { 359 va_start(args, fmt); 360 vsnprintf(e->buf + e->actual_size, str_size, fmt, args); 361 va_end(args); 362 } 363 364 /* Need to update the size even when not updating destination buffer to get the exact size 365 * of all input strings 366 */ 367 e->actual_size += str_size; 368 } 369 370 enum hl_device_status hl_device_status(struct hl_device *hdev) 371 { 372 enum hl_device_status status; 373 374 if (hdev->device_fini_pending) { 375 status = HL_DEVICE_STATUS_MALFUNCTION; 376 } else if (hdev->reset_info.in_reset) { 377 if (hdev->reset_info.in_compute_reset) 378 status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE; 379 else 380 status = HL_DEVICE_STATUS_IN_RESET; 381 } else if (hdev->reset_info.needs_reset) { 382 status = HL_DEVICE_STATUS_NEEDS_RESET; 383 } else if (hdev->disabled) { 384 status = HL_DEVICE_STATUS_MALFUNCTION; 385 } else if (!hdev->init_done) { 386 status = HL_DEVICE_STATUS_IN_DEVICE_CREATION; 387 } else { 388 status = HL_DEVICE_STATUS_OPERATIONAL; 389 } 390 391 return status; 392 } 393 394 bool hl_device_operational(struct hl_device *hdev, 395 enum hl_device_status *status) 396 { 397 enum hl_device_status current_status; 398 399 current_status = hl_device_status(hdev); 400 if (status) 401 *status = current_status; 402 403 switch (current_status) { 404 case HL_DEVICE_STATUS_MALFUNCTION: 405 case HL_DEVICE_STATUS_IN_RESET: 406 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 407 case HL_DEVICE_STATUS_NEEDS_RESET: 408 return false; 409 case HL_DEVICE_STATUS_OPERATIONAL: 410 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 411 default: 412 return true; 413 } 414 } 415 416 bool hl_ctrl_device_operational(struct hl_device *hdev, 417 enum hl_device_status *status) 418 { 419 enum hl_device_status current_status; 420 421 current_status = hl_device_status(hdev); 422 if (status) 423 *status = current_status; 424 425 switch (current_status) { 426 case HL_DEVICE_STATUS_MALFUNCTION: 427 return false; 428 case HL_DEVICE_STATUS_IN_RESET: 429 case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: 430 case HL_DEVICE_STATUS_NEEDS_RESET: 431 case HL_DEVICE_STATUS_OPERATIONAL: 432 case HL_DEVICE_STATUS_IN_DEVICE_CREATION: 433 default: 434 return true; 435 } 436 } 437 438 static void print_idle_status_mask(struct hl_device *hdev, const char *message, 439 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE]) 440 { 441 if (idle_mask[3]) 442 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx_%016llx)\n", 443 message, idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]); 444 else if (idle_mask[2]) 445 dev_err(hdev->dev, "%s (mask %#llx_%016llx_%016llx)\n", 446 message, idle_mask[2], idle_mask[1], idle_mask[0]); 447 else if (idle_mask[1]) 448 dev_err(hdev->dev, "%s (mask %#llx_%016llx)\n", 449 message, idle_mask[1], idle_mask[0]); 450 else 451 dev_err(hdev->dev, "%s (mask %#llx)\n", message, idle_mask[0]); 452 } 453 454 static void hpriv_release(struct kref *ref) 455 { 456 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 457 bool reset_device, device_is_idle = true; 458 struct hl_fpriv *hpriv; 459 struct hl_device *hdev; 460 461 hpriv = container_of(ref, struct hl_fpriv, refcount); 462 463 hdev = hpriv->hdev; 464 465 hdev->asic_funcs->send_device_activity(hdev, false); 466 467 hl_debugfs_remove_file(hpriv); 468 469 mutex_destroy(&hpriv->ctx_lock); 470 mutex_destroy(&hpriv->restore_phase_mutex); 471 472 /* There should be no memory buffers at this point and handles IDR can be destroyed */ 473 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); 474 475 /* Device should be reset if reset-upon-device-release is enabled, or if there is a pending 476 * reset that waits for device release. 477 */ 478 reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active; 479 480 /* Check the device idle status and reset if not idle. 481 * Skip it if already in reset, or if device is going to be reset in any case. 482 */ 483 if (!hdev->reset_info.in_reset && !reset_device && !hdev->pldm) 484 device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask, 485 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL); 486 if (!device_is_idle) { 487 print_idle_status_mask(hdev, "device is not idle after user context is closed", 488 idle_mask); 489 reset_device = true; 490 } 491 492 /* We need to remove the user from the list to make sure the reset process won't 493 * try to kill the user process. Because, if we got here, it means there are no 494 * more driver/device resources that the user process is occupying so there is 495 * no need to kill it 496 * 497 * However, we can't set the compute_ctx to NULL at this stage. This is to prevent 498 * a race between the release and opening the device again. We don't want to let 499 * a user open the device while there a reset is about to happen. 500 */ 501 mutex_lock(&hdev->fpriv_list_lock); 502 list_del(&hpriv->dev_node); 503 mutex_unlock(&hdev->fpriv_list_lock); 504 505 put_pid(hpriv->taskpid); 506 507 if (reset_device) { 508 hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE); 509 } else { 510 /* Scrubbing is handled within hl_device_reset(), so here need to do it directly */ 511 int rc = hdev->asic_funcs->scrub_device_mem(hdev); 512 513 if (rc) { 514 dev_err(hdev->dev, "failed to scrub memory from hpriv release (%d)\n", rc); 515 hl_device_reset(hdev, HL_DRV_RESET_HARD); 516 } 517 } 518 519 /* Now we can mark the compute_ctx as not active. Even if a reset is running in a different 520 * thread, we don't care because the in_reset is marked so if a user will try to open 521 * the device it will fail on that, even if compute_ctx is false. 522 */ 523 mutex_lock(&hdev->fpriv_list_lock); 524 hdev->is_compute_ctx_active = false; 525 mutex_unlock(&hdev->fpriv_list_lock); 526 527 hdev->compute_ctx_in_release = 0; 528 529 /* release the eventfd */ 530 if (hpriv->notifier_event.eventfd) 531 eventfd_ctx_put(hpriv->notifier_event.eventfd); 532 533 mutex_destroy(&hpriv->notifier_event.lock); 534 535 kfree(hpriv); 536 } 537 538 void hl_hpriv_get(struct hl_fpriv *hpriv) 539 { 540 kref_get(&hpriv->refcount); 541 } 542 543 int hl_hpriv_put(struct hl_fpriv *hpriv) 544 { 545 return kref_put(&hpriv->refcount, hpriv_release); 546 } 547 548 static void print_device_in_use_info(struct hl_device *hdev, const char *message) 549 { 550 u32 active_cs_num, dmabuf_export_cnt; 551 bool unknown_reason = true; 552 char buf[128]; 553 size_t size; 554 int offset; 555 556 size = sizeof(buf); 557 offset = 0; 558 559 active_cs_num = hl_get_active_cs_num(hdev); 560 if (active_cs_num) { 561 unknown_reason = false; 562 offset += scnprintf(buf + offset, size - offset, " [%u active CS]", active_cs_num); 563 } 564 565 dmabuf_export_cnt = atomic_read(&hdev->dmabuf_export_cnt); 566 if (dmabuf_export_cnt) { 567 unknown_reason = false; 568 offset += scnprintf(buf + offset, size - offset, " [%u exported dma-buf]", 569 dmabuf_export_cnt); 570 } 571 572 if (unknown_reason) 573 scnprintf(buf + offset, size - offset, " [unknown reason]"); 574 575 dev_notice(hdev->dev, "%s%s\n", message, buf); 576 } 577 578 /* 579 * hl_device_release() - release function for habanalabs device. 580 * @ddev: pointer to DRM device structure. 581 * @file: pointer to DRM file private data structure. 582 * 583 * Called when process closes an habanalabs device 584 */ 585 void hl_device_release(struct drm_device *ddev, struct drm_file *file_priv) 586 { 587 struct hl_fpriv *hpriv = file_priv->driver_priv; 588 struct hl_device *hdev = to_hl_device(ddev); 589 590 if (!hdev) { 591 pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); 592 put_pid(hpriv->taskpid); 593 } 594 595 hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr); 596 597 /* Memory buffers might be still in use at this point and thus the handles IDR destruction 598 * is postponed to hpriv_release(). 599 */ 600 hl_mem_mgr_fini(&hpriv->mem_mgr); 601 602 hdev->compute_ctx_in_release = 1; 603 604 if (!hl_hpriv_put(hpriv)) { 605 print_device_in_use_info(hdev, "User process closed FD but device still in use"); 606 hl_device_reset(hdev, HL_DRV_RESET_HARD); 607 } 608 609 hdev->last_open_session_duration_jif = jiffies - hdev->last_successful_open_jif; 610 } 611 612 static int hl_device_release_ctrl(struct inode *inode, struct file *filp) 613 { 614 struct hl_fpriv *hpriv = filp->private_data; 615 struct hl_device *hdev = hpriv->hdev; 616 617 filp->private_data = NULL; 618 619 if (!hdev) { 620 pr_err("Closing FD after device was removed\n"); 621 goto out; 622 } 623 624 mutex_lock(&hdev->fpriv_ctrl_list_lock); 625 list_del(&hpriv->dev_node); 626 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 627 out: 628 put_pid(hpriv->taskpid); 629 630 kfree(hpriv); 631 632 return 0; 633 } 634 635 static int __hl_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) 636 { 637 struct hl_device *hdev = hpriv->hdev; 638 unsigned long vm_pgoff; 639 640 if (!hdev) { 641 pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); 642 return -ENODEV; 643 } 644 645 vm_pgoff = vma->vm_pgoff; 646 647 switch (vm_pgoff & HL_MMAP_TYPE_MASK) { 648 case HL_MMAP_TYPE_BLOCK: 649 vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); 650 return hl_hw_block_mmap(hpriv, vma); 651 652 case HL_MMAP_TYPE_CB: 653 case HL_MMAP_TYPE_TS_BUFF: 654 return hl_mem_mgr_mmap(&hpriv->mem_mgr, vma, NULL); 655 } 656 return -EINVAL; 657 } 658 659 /* 660 * hl_mmap - mmap function for habanalabs device 661 * 662 * @*filp: pointer to file structure 663 * @*vma: pointer to vm_area_struct of the process 664 * 665 * Called when process does an mmap on habanalabs device. Call the relevant mmap 666 * function at the end of the common code. 667 */ 668 int hl_mmap(struct file *filp, struct vm_area_struct *vma) 669 { 670 struct drm_file *file_priv = filp->private_data; 671 struct hl_fpriv *hpriv = file_priv->driver_priv; 672 673 return __hl_mmap(hpriv, vma); 674 } 675 676 static const struct file_operations hl_ctrl_ops = { 677 .owner = THIS_MODULE, 678 .open = hl_device_open_ctrl, 679 .release = hl_device_release_ctrl, 680 .unlocked_ioctl = hl_ioctl_control, 681 .compat_ioctl = hl_ioctl_control 682 }; 683 684 static void device_release_func(struct device *dev) 685 { 686 kfree(dev); 687 } 688 689 /* 690 * device_init_cdev - Initialize cdev and device for habanalabs device 691 * 692 * @hdev: pointer to habanalabs device structure 693 * @class: pointer to the class object of the device 694 * @minor: minor number of the specific device 695 * @fops: file operations to install for this device 696 * @name: name of the device as it will appear in the filesystem 697 * @cdev: pointer to the char device object that will be initialized 698 * @dev: pointer to the device object that will be initialized 699 * 700 * Initialize a cdev and a Linux device for habanalabs's device. 701 */ 702 static int device_init_cdev(struct hl_device *hdev, const struct class *class, 703 int minor, const struct file_operations *fops, 704 char *name, struct cdev *cdev, 705 struct device **dev) 706 { 707 cdev_init(cdev, fops); 708 cdev->owner = THIS_MODULE; 709 710 *dev = kzalloc(sizeof(**dev), GFP_KERNEL); 711 if (!*dev) 712 return -ENOMEM; 713 714 device_initialize(*dev); 715 (*dev)->devt = MKDEV(hdev->major, minor); 716 (*dev)->class = class; 717 (*dev)->release = device_release_func; 718 dev_set_drvdata(*dev, hdev); 719 dev_set_name(*dev, "%s", name); 720 721 return 0; 722 } 723 724 static int cdev_sysfs_debugfs_add(struct hl_device *hdev) 725 { 726 const struct class *accel_class = hdev->drm.accel->kdev->class; 727 char name[32]; 728 int rc; 729 730 hdev->cdev_idx = hdev->drm.accel->index; 731 732 /* Initialize cdev and device structures for the control device */ 733 snprintf(name, sizeof(name), "accel_controlD%d", hdev->cdev_idx); 734 rc = device_init_cdev(hdev, accel_class, hdev->cdev_idx, &hl_ctrl_ops, name, 735 &hdev->cdev_ctrl, &hdev->dev_ctrl); 736 if (rc) 737 return rc; 738 739 rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl); 740 if (rc) { 741 dev_err(hdev->dev_ctrl, 742 "failed to add an accel control char device to the system\n"); 743 goto free_ctrl_device; 744 } 745 746 rc = hl_sysfs_init(hdev); 747 if (rc) { 748 dev_err(hdev->dev, "failed to initialize sysfs\n"); 749 goto delete_ctrl_cdev_device; 750 } 751 752 hl_debugfs_add_device(hdev); 753 754 hdev->cdev_sysfs_debugfs_created = true; 755 756 return 0; 757 758 delete_ctrl_cdev_device: 759 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 760 free_ctrl_device: 761 put_device(hdev->dev_ctrl); 762 return rc; 763 } 764 765 static void cdev_sysfs_debugfs_remove(struct hl_device *hdev) 766 { 767 if (!hdev->cdev_sysfs_debugfs_created) 768 return; 769 770 hl_sysfs_fini(hdev); 771 772 cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl); 773 put_device(hdev->dev_ctrl); 774 } 775 776 static void device_hard_reset_pending(struct work_struct *work) 777 { 778 struct hl_device_reset_work *device_reset_work = 779 container_of(work, struct hl_device_reset_work, reset_work.work); 780 struct hl_device *hdev = device_reset_work->hdev; 781 u32 flags; 782 int rc; 783 784 flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR; 785 786 rc = hl_device_reset(hdev, flags); 787 788 if ((rc == -EBUSY) && !hdev->device_fini_pending) { 789 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 790 791 if (ctx) { 792 /* The read refcount value should subtracted by one, because the read is 793 * protected with hl_get_compute_ctx(). 794 */ 795 dev_info(hdev->dev, 796 "Could not reset device (compute_ctx refcount %u). will try again in %u seconds", 797 kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC); 798 hl_ctx_put(ctx); 799 } else { 800 dev_info(hdev->dev, "Could not reset device. will try again in %u seconds", 801 HL_PENDING_RESET_PER_SEC); 802 } 803 804 queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work, 805 msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000)); 806 } 807 } 808 809 static void device_release_watchdog_func(struct work_struct *work) 810 { 811 struct hl_device_reset_work *watchdog_work = 812 container_of(work, struct hl_device_reset_work, reset_work.work); 813 struct hl_device *hdev = watchdog_work->hdev; 814 u32 flags; 815 816 dev_dbg(hdev->dev, "Device wasn't released in time. Initiate hard-reset.\n"); 817 818 flags = watchdog_work->flags | HL_DRV_RESET_HARD | HL_DRV_RESET_FROM_WD_THR; 819 820 hl_device_reset(hdev, flags); 821 } 822 823 /* 824 * device_early_init - do some early initialization for the habanalabs device 825 * 826 * @hdev: pointer to habanalabs device structure 827 * 828 * Install the relevant function pointers and call the early_init function, 829 * if such a function exists 830 */ 831 static int device_early_init(struct hl_device *hdev) 832 { 833 int i, rc; 834 char workq_name[32]; 835 836 switch (hdev->asic_type) { 837 case ASIC_GOYA: 838 goya_set_asic_funcs(hdev); 839 strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); 840 break; 841 case ASIC_GAUDI: 842 gaudi_set_asic_funcs(hdev); 843 strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name)); 844 break; 845 case ASIC_GAUDI_SEC: 846 gaudi_set_asic_funcs(hdev); 847 strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name)); 848 break; 849 case ASIC_GAUDI2: 850 gaudi2_set_asic_funcs(hdev); 851 strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name)); 852 break; 853 case ASIC_GAUDI2B: 854 gaudi2_set_asic_funcs(hdev); 855 strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name)); 856 break; 857 case ASIC_GAUDI2C: 858 gaudi2_set_asic_funcs(hdev); 859 strscpy(hdev->asic_name, "GAUDI2C", sizeof(hdev->asic_name)); 860 break; 861 default: 862 dev_err(hdev->dev, "Unrecognized ASIC type %d\n", 863 hdev->asic_type); 864 return -EINVAL; 865 } 866 867 rc = hdev->asic_funcs->early_init(hdev); 868 if (rc) 869 return rc; 870 871 rc = hl_asid_init(hdev); 872 if (rc) 873 goto early_fini; 874 875 if (hdev->asic_prop.completion_queues_count) { 876 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, 877 sizeof(struct workqueue_struct *), 878 GFP_KERNEL); 879 if (!hdev->cq_wq) { 880 rc = -ENOMEM; 881 goto asid_fini; 882 } 883 } 884 885 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) { 886 snprintf(workq_name, 32, "hl%u-free-jobs-%u", hdev->cdev_idx, (u32) i); 887 hdev->cq_wq[i] = create_singlethread_workqueue(workq_name); 888 if (hdev->cq_wq[i] == NULL) { 889 dev_err(hdev->dev, "Failed to allocate CQ workqueue\n"); 890 rc = -ENOMEM; 891 goto free_cq_wq; 892 } 893 } 894 895 snprintf(workq_name, 32, "hl%u-events", hdev->cdev_idx); 896 hdev->eq_wq = create_singlethread_workqueue(workq_name); 897 if (hdev->eq_wq == NULL) { 898 dev_err(hdev->dev, "Failed to allocate EQ workqueue\n"); 899 rc = -ENOMEM; 900 goto free_cq_wq; 901 } 902 903 snprintf(workq_name, 32, "hl%u-cs-completions", hdev->cdev_idx); 904 hdev->cs_cmplt_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 905 if (!hdev->cs_cmplt_wq) { 906 dev_err(hdev->dev, 907 "Failed to allocate CS completions workqueue\n"); 908 rc = -ENOMEM; 909 goto free_eq_wq; 910 } 911 912 snprintf(workq_name, 32, "hl%u-ts-free-obj", hdev->cdev_idx); 913 hdev->ts_free_obj_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 914 if (!hdev->ts_free_obj_wq) { 915 dev_err(hdev->dev, 916 "Failed to allocate Timestamp registration free workqueue\n"); 917 rc = -ENOMEM; 918 goto free_cs_cmplt_wq; 919 } 920 921 snprintf(workq_name, 32, "hl%u-prefetch", hdev->cdev_idx); 922 hdev->prefetch_wq = alloc_workqueue(workq_name, WQ_UNBOUND, 0); 923 if (!hdev->prefetch_wq) { 924 dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); 925 rc = -ENOMEM; 926 goto free_ts_free_wq; 927 } 928 929 hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); 930 if (!hdev->hl_chip_info) { 931 rc = -ENOMEM; 932 goto free_prefetch_wq; 933 } 934 935 rc = hl_mmu_if_set_funcs(hdev); 936 if (rc) 937 goto free_chip_info; 938 939 hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr); 940 941 snprintf(workq_name, 32, "hl%u_device_reset", hdev->cdev_idx); 942 hdev->reset_wq = create_singlethread_workqueue(workq_name); 943 if (!hdev->reset_wq) { 944 rc = -ENOMEM; 945 dev_err(hdev->dev, "Failed to create device reset WQ\n"); 946 goto free_cb_mgr; 947 } 948 949 INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending); 950 hdev->device_reset_work.hdev = hdev; 951 hdev->device_fini_pending = 0; 952 953 INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work, 954 device_release_watchdog_func); 955 hdev->device_release_watchdog_work.hdev = hdev; 956 957 mutex_init(&hdev->send_cpu_message_lock); 958 mutex_init(&hdev->debug_lock); 959 INIT_LIST_HEAD(&hdev->cs_mirror_list); 960 spin_lock_init(&hdev->cs_mirror_lock); 961 spin_lock_init(&hdev->reset_info.lock); 962 INIT_LIST_HEAD(&hdev->fpriv_list); 963 INIT_LIST_HEAD(&hdev->fpriv_ctrl_list); 964 mutex_init(&hdev->fpriv_list_lock); 965 mutex_init(&hdev->fpriv_ctrl_list_lock); 966 mutex_init(&hdev->clk_throttling.lock); 967 968 return 0; 969 970 free_cb_mgr: 971 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 972 hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); 973 free_chip_info: 974 kfree(hdev->hl_chip_info); 975 free_prefetch_wq: 976 destroy_workqueue(hdev->prefetch_wq); 977 free_ts_free_wq: 978 destroy_workqueue(hdev->ts_free_obj_wq); 979 free_cs_cmplt_wq: 980 destroy_workqueue(hdev->cs_cmplt_wq); 981 free_eq_wq: 982 destroy_workqueue(hdev->eq_wq); 983 free_cq_wq: 984 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 985 if (hdev->cq_wq[i]) 986 destroy_workqueue(hdev->cq_wq[i]); 987 kfree(hdev->cq_wq); 988 asid_fini: 989 hl_asid_fini(hdev); 990 early_fini: 991 if (hdev->asic_funcs->early_fini) 992 hdev->asic_funcs->early_fini(hdev); 993 994 return rc; 995 } 996 997 /* 998 * device_early_fini - finalize all that was done in device_early_init 999 * 1000 * @hdev: pointer to habanalabs device structure 1001 * 1002 */ 1003 static void device_early_fini(struct hl_device *hdev) 1004 { 1005 int i; 1006 1007 mutex_destroy(&hdev->debug_lock); 1008 mutex_destroy(&hdev->send_cpu_message_lock); 1009 1010 mutex_destroy(&hdev->fpriv_list_lock); 1011 mutex_destroy(&hdev->fpriv_ctrl_list_lock); 1012 1013 mutex_destroy(&hdev->clk_throttling.lock); 1014 1015 hl_mem_mgr_fini(&hdev->kernel_mem_mgr); 1016 hl_mem_mgr_idr_destroy(&hdev->kernel_mem_mgr); 1017 1018 kfree(hdev->hl_chip_info); 1019 1020 destroy_workqueue(hdev->prefetch_wq); 1021 destroy_workqueue(hdev->ts_free_obj_wq); 1022 destroy_workqueue(hdev->cs_cmplt_wq); 1023 destroy_workqueue(hdev->eq_wq); 1024 destroy_workqueue(hdev->reset_wq); 1025 1026 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1027 destroy_workqueue(hdev->cq_wq[i]); 1028 kfree(hdev->cq_wq); 1029 1030 hl_asid_fini(hdev); 1031 1032 if (hdev->asic_funcs->early_fini) 1033 hdev->asic_funcs->early_fini(hdev); 1034 } 1035 1036 static bool is_pci_link_healthy(struct hl_device *hdev) 1037 { 1038 u16 device_id; 1039 1040 if (!hdev->pdev) 1041 return false; 1042 1043 pci_read_config_word(hdev->pdev, PCI_DEVICE_ID, &device_id); 1044 1045 return (device_id == hdev->pdev->device); 1046 } 1047 1048 static int hl_device_eq_heartbeat_check(struct hl_device *hdev) 1049 { 1050 struct asic_fixed_properties *prop = &hdev->asic_prop; 1051 1052 if (!prop->cpucp_info.eq_health_check_supported) 1053 return 0; 1054 1055 if (hdev->eq_heartbeat_received) { 1056 hdev->eq_heartbeat_received = false; 1057 } else { 1058 dev_err(hdev->dev, "EQ heartbeat event was not received!\n"); 1059 return -EIO; 1060 } 1061 1062 return 0; 1063 } 1064 1065 static void hl_device_heartbeat(struct work_struct *work) 1066 { 1067 struct hl_device *hdev = container_of(work, struct hl_device, 1068 work_heartbeat.work); 1069 struct hl_info_fw_err_info info = {0}; 1070 u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 1071 1072 /* Start heartbeat checks only after driver has enabled events from FW */ 1073 if (!hl_device_operational(hdev, NULL) || !hdev->init_done) 1074 goto reschedule; 1075 1076 /* 1077 * For EQ health check need to check if driver received the heartbeat eq event 1078 * in order to validate the eq is working. 1079 * Only if both the EQ is healthy and we managed to send the next heartbeat reschedule. 1080 */ 1081 if ((!hl_device_eq_heartbeat_check(hdev)) && (!hdev->asic_funcs->send_heartbeat(hdev))) 1082 goto reschedule; 1083 1084 if (hl_device_operational(hdev, NULL)) 1085 dev_err(hdev->dev, "Device heartbeat failed! PCI link is %s\n", 1086 is_pci_link_healthy(hdev) ? "healthy" : "broken"); 1087 1088 info.err_type = HL_INFO_FW_HEARTBEAT_ERR; 1089 info.event_mask = &event_mask; 1090 hl_handle_fw_err(hdev, &info); 1091 hl_device_cond_reset(hdev, HL_DRV_RESET_HARD | HL_DRV_RESET_HEARTBEAT, event_mask); 1092 1093 return; 1094 1095 reschedule: 1096 /* 1097 * prev_reset_trigger tracks consecutive fatal h/w errors until first 1098 * heartbeat immediately post reset. 1099 * If control reached here, then at least one heartbeat work has been 1100 * scheduled since last reset/init cycle. 1101 * So if the device is not already in reset cycle, reset the flag 1102 * prev_reset_trigger as no reset occurred with HL_DRV_RESET_FW_FATAL_ERR 1103 * status for at least one heartbeat. From this point driver restarts 1104 * tracking future consecutive fatal errors. 1105 */ 1106 if (!hdev->reset_info.in_reset) 1107 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1108 1109 schedule_delayed_work(&hdev->work_heartbeat, 1110 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1111 } 1112 1113 /* 1114 * device_late_init - do late stuff initialization for the habanalabs device 1115 * 1116 * @hdev: pointer to habanalabs device structure 1117 * 1118 * Do stuff that either needs the device H/W queues to be active or needs 1119 * to happen after all the rest of the initialization is finished 1120 */ 1121 static int device_late_init(struct hl_device *hdev) 1122 { 1123 int rc; 1124 1125 if (hdev->asic_funcs->late_init) { 1126 rc = hdev->asic_funcs->late_init(hdev); 1127 if (rc) { 1128 dev_err(hdev->dev, 1129 "failed late initialization for the H/W\n"); 1130 return rc; 1131 } 1132 } 1133 1134 hdev->high_pll = hdev->asic_prop.high_pll; 1135 1136 if (hdev->heartbeat) { 1137 /* 1138 * Before scheduling the heartbeat driver will check if eq event has received. 1139 * for the first schedule we need to set the indication as true then for the next 1140 * one this indication will be true only if eq event was sent by FW. 1141 */ 1142 hdev->eq_heartbeat_received = true; 1143 1144 INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat); 1145 1146 schedule_delayed_work(&hdev->work_heartbeat, 1147 usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); 1148 } 1149 1150 hdev->late_init_done = true; 1151 1152 return 0; 1153 } 1154 1155 /* 1156 * device_late_fini - finalize all that was done in device_late_init 1157 * 1158 * @hdev: pointer to habanalabs device structure 1159 * 1160 */ 1161 static void device_late_fini(struct hl_device *hdev) 1162 { 1163 if (!hdev->late_init_done) 1164 return; 1165 1166 if (hdev->heartbeat) 1167 cancel_delayed_work_sync(&hdev->work_heartbeat); 1168 1169 if (hdev->asic_funcs->late_fini) 1170 hdev->asic_funcs->late_fini(hdev); 1171 1172 hdev->late_init_done = false; 1173 } 1174 1175 int hl_device_utilization(struct hl_device *hdev, u32 *utilization) 1176 { 1177 u64 max_power, curr_power, dc_power, dividend, divisor; 1178 int rc; 1179 1180 max_power = hdev->max_power; 1181 dc_power = hdev->asic_prop.dc_power_default; 1182 divisor = max_power - dc_power; 1183 if (!divisor) { 1184 dev_warn(hdev->dev, "device utilization is not supported\n"); 1185 return -EOPNOTSUPP; 1186 } 1187 rc = hl_fw_cpucp_power_get(hdev, &curr_power); 1188 1189 if (rc) 1190 return rc; 1191 1192 curr_power = clamp(curr_power, dc_power, max_power); 1193 1194 dividend = (curr_power - dc_power) * 100; 1195 *utilization = (u32) div_u64(dividend, divisor); 1196 1197 return 0; 1198 } 1199 1200 int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable) 1201 { 1202 int rc = 0; 1203 1204 mutex_lock(&hdev->debug_lock); 1205 1206 if (!enable) { 1207 if (!hdev->in_debug) { 1208 dev_err(hdev->dev, 1209 "Failed to disable debug mode because device was not in debug mode\n"); 1210 rc = -EFAULT; 1211 goto out; 1212 } 1213 1214 if (!hdev->reset_info.hard_reset_pending) 1215 hdev->asic_funcs->halt_coresight(hdev, ctx); 1216 1217 hdev->in_debug = 0; 1218 1219 goto out; 1220 } 1221 1222 if (hdev->in_debug) { 1223 dev_err(hdev->dev, 1224 "Failed to enable debug mode because device is already in debug mode\n"); 1225 rc = -EFAULT; 1226 goto out; 1227 } 1228 1229 hdev->in_debug = 1; 1230 1231 out: 1232 mutex_unlock(&hdev->debug_lock); 1233 1234 return rc; 1235 } 1236 1237 static void take_release_locks(struct hl_device *hdev) 1238 { 1239 /* Flush anyone that is inside the critical section of enqueue 1240 * jobs to the H/W 1241 */ 1242 hdev->asic_funcs->hw_queues_lock(hdev); 1243 hdev->asic_funcs->hw_queues_unlock(hdev); 1244 1245 /* Flush processes that are sending message to CPU */ 1246 mutex_lock(&hdev->send_cpu_message_lock); 1247 mutex_unlock(&hdev->send_cpu_message_lock); 1248 1249 /* Flush anyone that is inside device open */ 1250 mutex_lock(&hdev->fpriv_list_lock); 1251 mutex_unlock(&hdev->fpriv_list_lock); 1252 mutex_lock(&hdev->fpriv_ctrl_list_lock); 1253 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 1254 } 1255 1256 static void hl_abort_waiting_for_completions(struct hl_device *hdev) 1257 { 1258 hl_abort_waiting_for_cs_completions(hdev); 1259 1260 /* Release all pending user interrupts, each pending user interrupt 1261 * holds a reference to a user context. 1262 */ 1263 hl_release_pending_user_interrupts(hdev); 1264 } 1265 1266 static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset, 1267 bool skip_wq_flush) 1268 { 1269 if (hard_reset) 1270 device_late_fini(hdev); 1271 1272 /* 1273 * Halt the engines and disable interrupts so we won't get any more 1274 * completions from H/W and we won't have any accesses from the 1275 * H/W to the host machine 1276 */ 1277 hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset); 1278 1279 /* Go over all the queues, release all CS and their jobs */ 1280 hl_cs_rollback_all(hdev, skip_wq_flush); 1281 1282 /* flush the MMU prefetch workqueue */ 1283 flush_workqueue(hdev->prefetch_wq); 1284 1285 hl_abort_waiting_for_completions(hdev); 1286 } 1287 1288 /* 1289 * hl_device_suspend - initiate device suspend 1290 * 1291 * @hdev: pointer to habanalabs device structure 1292 * 1293 * Puts the hw in the suspend state (all asics). 1294 * Returns 0 for success or an error on failure. 1295 * Called at driver suspend. 1296 */ 1297 int hl_device_suspend(struct hl_device *hdev) 1298 { 1299 int rc; 1300 1301 pci_save_state(hdev->pdev); 1302 1303 /* Block future CS/VM/JOB completion operations */ 1304 spin_lock(&hdev->reset_info.lock); 1305 if (hdev->reset_info.in_reset) { 1306 spin_unlock(&hdev->reset_info.lock); 1307 dev_err(hdev->dev, "Can't suspend while in reset\n"); 1308 return -EIO; 1309 } 1310 hdev->reset_info.in_reset = 1; 1311 spin_unlock(&hdev->reset_info.lock); 1312 1313 /* This blocks all other stuff that is not blocked by in_reset */ 1314 hdev->disabled = true; 1315 1316 take_release_locks(hdev); 1317 1318 rc = hdev->asic_funcs->suspend(hdev); 1319 if (rc) 1320 dev_err(hdev->dev, 1321 "Failed to disable PCI access of device CPU\n"); 1322 1323 /* Shut down the device */ 1324 pci_disable_device(hdev->pdev); 1325 pci_set_power_state(hdev->pdev, PCI_D3hot); 1326 1327 return 0; 1328 } 1329 1330 /* 1331 * hl_device_resume - initiate device resume 1332 * 1333 * @hdev: pointer to habanalabs device structure 1334 * 1335 * Bring the hw back to operating state (all asics). 1336 * Returns 0 for success or an error on failure. 1337 * Called at driver resume. 1338 */ 1339 int hl_device_resume(struct hl_device *hdev) 1340 { 1341 int rc; 1342 1343 pci_set_power_state(hdev->pdev, PCI_D0); 1344 pci_restore_state(hdev->pdev); 1345 rc = pci_enable_device_mem(hdev->pdev); 1346 if (rc) { 1347 dev_err(hdev->dev, 1348 "Failed to enable PCI device in resume\n"); 1349 return rc; 1350 } 1351 1352 pci_set_master(hdev->pdev); 1353 1354 rc = hdev->asic_funcs->resume(hdev); 1355 if (rc) { 1356 dev_err(hdev->dev, "Failed to resume device after suspend\n"); 1357 goto disable_device; 1358 } 1359 1360 1361 /* 'in_reset' was set to true during suspend, now we must clear it in order 1362 * for hard reset to be performed 1363 */ 1364 spin_lock(&hdev->reset_info.lock); 1365 hdev->reset_info.in_reset = 0; 1366 spin_unlock(&hdev->reset_info.lock); 1367 1368 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); 1369 if (rc) { 1370 dev_err(hdev->dev, "Failed to reset device during resume\n"); 1371 goto disable_device; 1372 } 1373 1374 return 0; 1375 1376 disable_device: 1377 pci_disable_device(hdev->pdev); 1378 1379 return rc; 1380 } 1381 1382 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout, bool control_dev) 1383 { 1384 struct task_struct *task = NULL; 1385 struct list_head *hpriv_list; 1386 struct hl_fpriv *hpriv; 1387 struct mutex *hpriv_lock; 1388 u32 pending_cnt; 1389 1390 hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1391 hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1392 1393 /* Giving time for user to close FD, and for processes that are inside 1394 * hl_device_open to finish 1395 */ 1396 if (!list_empty(hpriv_list)) 1397 ssleep(1); 1398 1399 if (timeout) { 1400 pending_cnt = timeout; 1401 } else { 1402 if (hdev->process_kill_trial_cnt) { 1403 /* Processes have been already killed */ 1404 pending_cnt = 1; 1405 goto wait_for_processes; 1406 } else { 1407 /* Wait a small period after process kill */ 1408 pending_cnt = HL_PENDING_RESET_PER_SEC; 1409 } 1410 } 1411 1412 mutex_lock(hpriv_lock); 1413 1414 /* This section must be protected because we are dereferencing 1415 * pointers that are freed if the process exits 1416 */ 1417 list_for_each_entry(hpriv, hpriv_list, dev_node) { 1418 task = get_pid_task(hpriv->taskpid, PIDTYPE_PID); 1419 if (task) { 1420 dev_info(hdev->dev, "Killing user process pid=%d\n", 1421 task_pid_nr(task)); 1422 send_sig(SIGKILL, task, 1); 1423 usleep_range(1000, 10000); 1424 1425 put_task_struct(task); 1426 } else { 1427 dev_dbg(hdev->dev, 1428 "Can't get task struct for user process %d, process was killed from outside the driver\n", 1429 pid_nr(hpriv->taskpid)); 1430 } 1431 } 1432 1433 mutex_unlock(hpriv_lock); 1434 1435 /* 1436 * We killed the open users, but that doesn't mean they are closed. 1437 * It could be that they are running a long cleanup phase in the driver 1438 * e.g. MMU unmappings, or running other long teardown flow even before 1439 * our cleanup. 1440 * Therefore we need to wait again to make sure they are closed before 1441 * continuing with the reset. 1442 */ 1443 1444 wait_for_processes: 1445 while ((!list_empty(hpriv_list)) && (pending_cnt)) { 1446 dev_dbg(hdev->dev, 1447 "Waiting for all unmap operations to finish before hard reset\n"); 1448 1449 pending_cnt--; 1450 1451 ssleep(1); 1452 } 1453 1454 /* All processes exited successfully */ 1455 if (list_empty(hpriv_list)) 1456 return 0; 1457 1458 /* Give up waiting for processes to exit */ 1459 if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS) 1460 return -ETIME; 1461 1462 hdev->process_kill_trial_cnt++; 1463 1464 return -EBUSY; 1465 } 1466 1467 static void device_disable_open_processes(struct hl_device *hdev, bool control_dev) 1468 { 1469 struct list_head *hpriv_list; 1470 struct hl_fpriv *hpriv; 1471 struct mutex *hpriv_lock; 1472 1473 hpriv_lock = control_dev ? &hdev->fpriv_ctrl_list_lock : &hdev->fpriv_list_lock; 1474 hpriv_list = control_dev ? &hdev->fpriv_ctrl_list : &hdev->fpriv_list; 1475 1476 mutex_lock(hpriv_lock); 1477 list_for_each_entry(hpriv, hpriv_list, dev_node) 1478 hpriv->hdev = NULL; 1479 mutex_unlock(hpriv_lock); 1480 } 1481 1482 static void send_disable_pci_access(struct hl_device *hdev, u32 flags) 1483 { 1484 /* If reset is due to heartbeat, device CPU is no responsive in 1485 * which case no point sending PCI disable message to it. 1486 */ 1487 if ((flags & HL_DRV_RESET_HARD) && 1488 !(flags & (HL_DRV_RESET_HEARTBEAT | HL_DRV_RESET_BYPASS_REQ_TO_FW))) { 1489 /* Disable PCI access from device F/W so he won't send 1490 * us additional interrupts. We disable MSI/MSI-X at 1491 * the halt_engines function and we can't have the F/W 1492 * sending us interrupts after that. We need to disable 1493 * the access here because if the device is marked 1494 * disable, the message won't be send. Also, in case 1495 * of heartbeat, the device CPU is marked as disable 1496 * so this message won't be sent 1497 */ 1498 if (hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0)) { 1499 dev_warn(hdev->dev, "Failed to disable FW's PCI access\n"); 1500 return; 1501 } 1502 1503 /* verify that last EQs are handled before disabled is set */ 1504 if (hdev->cpu_queues_enable) 1505 synchronize_irq(pci_irq_vector(hdev->pdev, 1506 hdev->asic_prop.eq_interrupt_id)); 1507 } 1508 } 1509 1510 static void handle_reset_trigger(struct hl_device *hdev, u32 flags) 1511 { 1512 u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 1513 1514 /* No consecutive mechanism when user context exists */ 1515 if (hdev->is_compute_ctx_active) 1516 return; 1517 1518 /* 1519 * 'reset cause' is being updated here, because getting here 1520 * means that it's the 1st time and the last time we're here 1521 * ('in_reset' makes sure of it). This makes sure that 1522 * 'reset_cause' will continue holding its 1st recorded reason! 1523 */ 1524 if (flags & HL_DRV_RESET_HEARTBEAT) { 1525 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT; 1526 cur_reset_trigger = HL_DRV_RESET_HEARTBEAT; 1527 } else if (flags & HL_DRV_RESET_TDR) { 1528 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_TDR; 1529 cur_reset_trigger = HL_DRV_RESET_TDR; 1530 } else if (flags & HL_DRV_RESET_FW_FATAL_ERR) { 1531 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1532 cur_reset_trigger = HL_DRV_RESET_FW_FATAL_ERR; 1533 } else { 1534 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 1535 } 1536 1537 /* 1538 * If reset cause is same twice, then reset_trigger_repeated 1539 * is set and if this reset is due to a fatal FW error 1540 * device is set to an unstable state. 1541 */ 1542 if (hdev->reset_info.prev_reset_trigger != cur_reset_trigger) { 1543 hdev->reset_info.prev_reset_trigger = cur_reset_trigger; 1544 hdev->reset_info.reset_trigger_repeated = 0; 1545 } else { 1546 hdev->reset_info.reset_trigger_repeated = 1; 1547 } 1548 } 1549 1550 /* 1551 * hl_device_reset - reset the device 1552 * 1553 * @hdev: pointer to habanalabs device structure 1554 * @flags: reset flags. 1555 * 1556 * Block future CS and wait for pending CS to be enqueued 1557 * Call ASIC H/W fini 1558 * Flush all completions 1559 * Re-initialize all internal data structures 1560 * Call ASIC H/W init, late_init 1561 * Test queues 1562 * Enable device 1563 * 1564 * Returns 0 for success or an error on failure. 1565 */ 1566 int hl_device_reset(struct hl_device *hdev, u32 flags) 1567 { 1568 bool hard_reset, from_hard_reset_thread, fw_reset, reset_upon_device_release, 1569 schedule_hard_reset = false, delay_reset, from_dev_release, from_watchdog_thread; 1570 u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0}; 1571 struct hl_ctx *ctx; 1572 int i, rc, hw_fini_rc; 1573 1574 if (!hdev->init_done) { 1575 dev_err(hdev->dev, "Can't reset before initialization is done\n"); 1576 return 0; 1577 } 1578 1579 hard_reset = !!(flags & HL_DRV_RESET_HARD); 1580 from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR); 1581 fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW); 1582 from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE); 1583 delay_reset = !!(flags & HL_DRV_RESET_DELAY); 1584 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); 1585 reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; 1586 1587 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { 1588 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); 1589 return 0; 1590 } 1591 1592 if (!hard_reset && !hdev->asic_prop.supports_compute_reset) { 1593 dev_dbg(hdev->dev, "asic doesn't support compute reset - do hard-reset instead\n"); 1594 hard_reset = true; 1595 } 1596 1597 if (reset_upon_device_release) { 1598 if (hard_reset) { 1599 dev_crit(hdev->dev, 1600 "Aborting reset because hard-reset is mutually exclusive with reset-on-device-release\n"); 1601 return -EINVAL; 1602 } 1603 1604 goto do_reset; 1605 } 1606 1607 if (!hard_reset && !hdev->asic_prop.allow_inference_soft_reset) { 1608 dev_dbg(hdev->dev, 1609 "asic doesn't allow inference soft reset - do hard-reset instead\n"); 1610 hard_reset = true; 1611 } 1612 1613 do_reset: 1614 /* Re-entry of reset thread */ 1615 if (from_hard_reset_thread && hdev->process_kill_trial_cnt) 1616 goto kill_processes; 1617 1618 /* 1619 * Prevent concurrency in this function - only one reset should be 1620 * done at any given time. We need to perform this only if we didn't 1621 * get here from a dedicated hard reset thread. 1622 */ 1623 if (!from_hard_reset_thread) { 1624 /* Block future CS/VM/JOB completion operations */ 1625 spin_lock(&hdev->reset_info.lock); 1626 if (hdev->reset_info.in_reset) { 1627 /* We allow scheduling of a hard reset only during a compute reset */ 1628 if (hard_reset && hdev->reset_info.in_compute_reset) 1629 hdev->reset_info.hard_reset_schedule_flags = flags; 1630 spin_unlock(&hdev->reset_info.lock); 1631 return 0; 1632 } 1633 1634 /* This still allows the completion of some KDMA ops 1635 * Update this before in_reset because in_compute_reset implies we are in reset 1636 */ 1637 hdev->reset_info.in_compute_reset = !hard_reset; 1638 1639 hdev->reset_info.in_reset = 1; 1640 1641 spin_unlock(&hdev->reset_info.lock); 1642 1643 /* Cancel the device release watchdog work if required. 1644 * In case of reset-upon-device-release while the release watchdog work is 1645 * scheduled due to a hard-reset, do hard-reset instead of compute-reset. 1646 */ 1647 if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) { 1648 struct hl_device_reset_work *watchdog_work = 1649 &hdev->device_release_watchdog_work; 1650 1651 hdev->reset_info.watchdog_active = 0; 1652 if (!from_watchdog_thread) 1653 cancel_delayed_work_sync(&watchdog_work->reset_work); 1654 1655 if (from_dev_release && (watchdog_work->flags & HL_DRV_RESET_HARD)) { 1656 hdev->reset_info.in_compute_reset = 0; 1657 flags |= HL_DRV_RESET_HARD; 1658 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1659 hard_reset = true; 1660 } 1661 } 1662 1663 if (delay_reset) 1664 usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1); 1665 1666 escalate_reset_flow: 1667 handle_reset_trigger(hdev, flags); 1668 send_disable_pci_access(hdev, flags); 1669 1670 /* This also blocks future CS/VM/JOB completion operations */ 1671 hdev->disabled = true; 1672 1673 take_release_locks(hdev); 1674 1675 if (hard_reset) 1676 dev_info(hdev->dev, "Going to reset device\n"); 1677 else if (reset_upon_device_release) 1678 dev_dbg(hdev->dev, "Going to reset device after release by user\n"); 1679 else 1680 dev_dbg(hdev->dev, "Going to reset engines of inference device\n"); 1681 } 1682 1683 if ((hard_reset) && (!from_hard_reset_thread)) { 1684 hdev->reset_info.hard_reset_pending = true; 1685 1686 hdev->process_kill_trial_cnt = 0; 1687 1688 hdev->device_reset_work.flags = flags; 1689 1690 /* 1691 * Because the reset function can't run from heartbeat work, 1692 * we need to call the reset function from a dedicated work. 1693 */ 1694 queue_delayed_work(hdev->reset_wq, &hdev->device_reset_work.reset_work, 0); 1695 1696 return 0; 1697 } 1698 1699 cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release); 1700 1701 kill_processes: 1702 if (hard_reset) { 1703 /* Kill processes here after CS rollback. This is because the 1704 * process can't really exit until all its CSs are done, which 1705 * is what we do in cs rollback 1706 */ 1707 rc = device_kill_open_processes(hdev, 0, false); 1708 1709 if (rc == -EBUSY) { 1710 if (hdev->device_fini_pending) { 1711 dev_crit(hdev->dev, 1712 "%s Failed to kill all open processes, stopping hard reset\n", 1713 dev_name(&(hdev)->pdev->dev)); 1714 goto out_err; 1715 } 1716 1717 /* signal reset thread to reschedule */ 1718 return rc; 1719 } 1720 1721 if (rc) { 1722 dev_crit(hdev->dev, 1723 "%s Failed to kill all open processes, stopping hard reset\n", 1724 dev_name(&(hdev)->pdev->dev)); 1725 goto out_err; 1726 } 1727 1728 /* Flush the Event queue workers to make sure no other thread is 1729 * reading or writing to registers during the reset 1730 */ 1731 flush_workqueue(hdev->eq_wq); 1732 } 1733 1734 /* Reset the H/W. It will be in idle state after this returns */ 1735 hw_fini_rc = hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset); 1736 1737 if (hard_reset) { 1738 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 1739 1740 /* Release kernel context */ 1741 if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1) 1742 hdev->kernel_ctx = NULL; 1743 1744 hl_vm_fini(hdev); 1745 hl_mmu_fini(hdev); 1746 hl_eq_reset(hdev, &hdev->event_queue); 1747 } 1748 1749 /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */ 1750 hl_hw_queue_reset(hdev, hard_reset); 1751 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1752 hl_cq_reset(hdev, &hdev->completion_queue[i]); 1753 1754 /* Make sure the context switch phase will run again */ 1755 ctx = hl_get_compute_ctx(hdev); 1756 if (ctx) { 1757 atomic_set(&ctx->thread_ctx_switch_token, 1); 1758 ctx->thread_ctx_switch_wait_token = 0; 1759 hl_ctx_put(ctx); 1760 } 1761 1762 if (hw_fini_rc) { 1763 rc = hw_fini_rc; 1764 goto out_err; 1765 } 1766 /* Finished tear-down, starting to re-initialize */ 1767 1768 if (hard_reset) { 1769 hdev->device_cpu_disabled = false; 1770 hdev->reset_info.hard_reset_pending = false; 1771 1772 /* 1773 * Put the device in an unusable state if there are 2 back to back resets due to 1774 * fatal errors. 1775 */ 1776 if (hdev->reset_info.reset_trigger_repeated && 1777 (hdev->reset_info.prev_reset_trigger == HL_DRV_RESET_FW_FATAL_ERR || 1778 hdev->reset_info.prev_reset_trigger == 1779 HL_DRV_RESET_HEARTBEAT)) { 1780 dev_crit(hdev->dev, 1781 "%s Consecutive fatal errors, stopping hard reset\n", 1782 dev_name(&(hdev)->pdev->dev)); 1783 rc = -EIO; 1784 goto out_err; 1785 } 1786 1787 if (hdev->kernel_ctx) { 1788 dev_crit(hdev->dev, 1789 "%s kernel ctx was alive during hard reset, something is terribly wrong\n", 1790 dev_name(&(hdev)->pdev->dev)); 1791 rc = -EBUSY; 1792 goto out_err; 1793 } 1794 1795 rc = hl_mmu_init(hdev); 1796 if (rc) { 1797 dev_err(hdev->dev, 1798 "Failed to initialize MMU S/W after hard reset\n"); 1799 goto out_err; 1800 } 1801 1802 /* Allocate the kernel context */ 1803 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), 1804 GFP_KERNEL); 1805 if (!hdev->kernel_ctx) { 1806 rc = -ENOMEM; 1807 hl_mmu_fini(hdev); 1808 goto out_err; 1809 } 1810 1811 hdev->is_compute_ctx_active = false; 1812 1813 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 1814 if (rc) { 1815 dev_err(hdev->dev, 1816 "failed to init kernel ctx in hard reset\n"); 1817 kfree(hdev->kernel_ctx); 1818 hdev->kernel_ctx = NULL; 1819 hl_mmu_fini(hdev); 1820 goto out_err; 1821 } 1822 } 1823 1824 /* Device is now enabled as part of the initialization requires 1825 * communication with the device firmware to get information that 1826 * is required for the initialization itself 1827 */ 1828 hdev->disabled = false; 1829 1830 /* F/W security enabled indication might be updated after hard-reset */ 1831 if (hard_reset) { 1832 rc = hl_fw_read_preboot_status(hdev); 1833 if (rc) 1834 goto out_err; 1835 } 1836 1837 rc = hdev->asic_funcs->hw_init(hdev); 1838 if (rc) { 1839 dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); 1840 goto out_err; 1841 } 1842 1843 /* If device is not idle fail the reset process */ 1844 if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask, 1845 HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) { 1846 print_idle_status_mask(hdev, "device is not idle after reset", idle_mask); 1847 rc = -EIO; 1848 goto out_err; 1849 } 1850 1851 /* Check that the communication with the device is working */ 1852 rc = hdev->asic_funcs->test_queues(hdev); 1853 if (rc) { 1854 dev_err(hdev->dev, "Failed to detect if device is alive after reset\n"); 1855 goto out_err; 1856 } 1857 1858 if (hard_reset) { 1859 rc = device_late_init(hdev); 1860 if (rc) { 1861 dev_err(hdev->dev, "Failed late init after hard reset\n"); 1862 goto out_err; 1863 } 1864 1865 rc = hl_vm_init(hdev); 1866 if (rc) { 1867 dev_err(hdev->dev, "Failed to init memory module after hard reset\n"); 1868 goto out_err; 1869 } 1870 1871 if (!hdev->asic_prop.fw_security_enabled) 1872 hl_fw_set_max_power(hdev); 1873 } else { 1874 rc = hdev->asic_funcs->compute_reset_late_init(hdev); 1875 if (rc) { 1876 if (reset_upon_device_release) 1877 dev_err(hdev->dev, 1878 "Failed late init in reset after device release\n"); 1879 else 1880 dev_err(hdev->dev, "Failed late init after compute reset\n"); 1881 goto out_err; 1882 } 1883 } 1884 1885 rc = hdev->asic_funcs->scrub_device_mem(hdev); 1886 if (rc) { 1887 dev_err(hdev->dev, "scrub mem failed from device reset (%d)\n", rc); 1888 goto out_err; 1889 } 1890 1891 spin_lock(&hdev->reset_info.lock); 1892 hdev->reset_info.in_compute_reset = 0; 1893 1894 /* Schedule hard reset only if requested and if not already in hard reset. 1895 * We keep 'in_reset' enabled, so no other reset can go in during the hard 1896 * reset schedule 1897 */ 1898 if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags) 1899 schedule_hard_reset = true; 1900 else 1901 hdev->reset_info.in_reset = 0; 1902 1903 spin_unlock(&hdev->reset_info.lock); 1904 1905 hdev->reset_info.needs_reset = false; 1906 1907 if (hard_reset) 1908 dev_info(hdev->dev, 1909 "Successfully finished resetting the %s device\n", 1910 dev_name(&(hdev)->pdev->dev)); 1911 else 1912 dev_dbg(hdev->dev, 1913 "Successfully finished resetting the %s device\n", 1914 dev_name(&(hdev)->pdev->dev)); 1915 1916 if (hard_reset) { 1917 hdev->reset_info.hard_reset_cnt++; 1918 1919 /* After reset is done, we are ready to receive events from 1920 * the F/W. We can't do it before because we will ignore events 1921 * and if those events are fatal, we won't know about it and 1922 * the device will be operational although it shouldn't be 1923 */ 1924 hdev->asic_funcs->enable_events_from_fw(hdev); 1925 } else { 1926 if (!reset_upon_device_release) 1927 hdev->reset_info.compute_reset_cnt++; 1928 1929 if (schedule_hard_reset) { 1930 dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n"); 1931 flags = hdev->reset_info.hard_reset_schedule_flags; 1932 hdev->reset_info.hard_reset_schedule_flags = 0; 1933 hard_reset = true; 1934 goto escalate_reset_flow; 1935 } 1936 } 1937 1938 return 0; 1939 1940 out_err: 1941 hdev->disabled = true; 1942 1943 spin_lock(&hdev->reset_info.lock); 1944 hdev->reset_info.in_compute_reset = 0; 1945 1946 if (hard_reset) { 1947 dev_err(hdev->dev, 1948 "%s Failed to reset! Device is NOT usable\n", 1949 dev_name(&(hdev)->pdev->dev)); 1950 hdev->reset_info.hard_reset_cnt++; 1951 } else { 1952 if (reset_upon_device_release) { 1953 dev_err(hdev->dev, "Failed to reset device after user release\n"); 1954 flags &= ~HL_DRV_RESET_DEV_RELEASE; 1955 } else { 1956 dev_err(hdev->dev, "Failed to do compute reset\n"); 1957 hdev->reset_info.compute_reset_cnt++; 1958 } 1959 1960 spin_unlock(&hdev->reset_info.lock); 1961 flags |= HL_DRV_RESET_HARD; 1962 hard_reset = true; 1963 goto escalate_reset_flow; 1964 } 1965 1966 hdev->reset_info.in_reset = 0; 1967 1968 spin_unlock(&hdev->reset_info.lock); 1969 1970 return rc; 1971 } 1972 1973 /* 1974 * hl_device_cond_reset() - conditionally reset the device. 1975 * @hdev: pointer to habanalabs device structure. 1976 * @reset_flags: reset flags. 1977 * @event_mask: events to notify user about. 1978 * 1979 * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device 1980 * unless another reset precedes it. 1981 */ 1982 int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask) 1983 { 1984 struct hl_ctx *ctx = NULL; 1985 1986 /* F/W reset cannot be postponed */ 1987 if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW) 1988 goto device_reset; 1989 1990 /* Device release watchdog is relevant only if user exists and gets a reset notification */ 1991 if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) { 1992 dev_err(hdev->dev, "Resetting device without a reset indication to user\n"); 1993 goto device_reset; 1994 } 1995 1996 ctx = hl_get_compute_ctx(hdev); 1997 if (!ctx) 1998 goto device_reset; 1999 2000 /* 2001 * There is no point in postponing the reset if user is not registered for events. 2002 * However if no eventfd_ctx exists but the device release watchdog is already scheduled, it 2003 * just implies that user has unregistered as part of handling a previous event. In this 2004 * case an immediate reset is not required. 2005 */ 2006 if (!ctx->hpriv->notifier_event.eventfd && !hdev->reset_info.watchdog_active) 2007 goto device_reset; 2008 2009 /* Schedule the device release watchdog work unless reset is already in progress or if the 2010 * work is already scheduled. 2011 */ 2012 spin_lock(&hdev->reset_info.lock); 2013 if (hdev->reset_info.in_reset) { 2014 spin_unlock(&hdev->reset_info.lock); 2015 goto device_reset; 2016 } 2017 2018 if (hdev->reset_info.watchdog_active) { 2019 hdev->device_release_watchdog_work.flags |= flags; 2020 goto out; 2021 } 2022 2023 hdev->device_release_watchdog_work.flags = flags; 2024 dev_dbg(hdev->dev, "Device is going to be hard-reset in %u sec unless being released\n", 2025 hdev->device_release_watchdog_timeout_sec); 2026 schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work, 2027 msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000)); 2028 hdev->reset_info.watchdog_active = 1; 2029 out: 2030 spin_unlock(&hdev->reset_info.lock); 2031 2032 hl_notifier_event_send_all(hdev, event_mask); 2033 2034 hl_ctx_put(ctx); 2035 2036 hl_abort_waiting_for_completions(hdev); 2037 2038 return 0; 2039 2040 device_reset: 2041 if (event_mask) 2042 hl_notifier_event_send_all(hdev, event_mask); 2043 if (ctx) 2044 hl_ctx_put(ctx); 2045 2046 return hl_device_reset(hdev, flags | HL_DRV_RESET_HARD); 2047 } 2048 2049 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask) 2050 { 2051 mutex_lock(¬ifier_event->lock); 2052 notifier_event->events_mask |= event_mask; 2053 2054 if (notifier_event->eventfd) 2055 eventfd_signal(notifier_event->eventfd); 2056 2057 mutex_unlock(¬ifier_event->lock); 2058 } 2059 2060 /* 2061 * hl_notifier_event_send_all - notify all user processes via eventfd 2062 * 2063 * @hdev: pointer to habanalabs device structure 2064 * @event_mask: the occurred event/s 2065 * Returns 0 for success or an error on failure. 2066 */ 2067 void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask) 2068 { 2069 struct hl_fpriv *hpriv; 2070 2071 if (!event_mask) { 2072 dev_warn(hdev->dev, "Skip sending zero event"); 2073 return; 2074 } 2075 2076 mutex_lock(&hdev->fpriv_list_lock); 2077 2078 list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) 2079 hl_notifier_event_send(&hpriv->notifier_event, event_mask); 2080 2081 mutex_unlock(&hdev->fpriv_list_lock); 2082 } 2083 2084 /* 2085 * hl_device_init - main initialization function for habanalabs device 2086 * 2087 * @hdev: pointer to habanalabs device structure 2088 * 2089 * Allocate an id for the device, do early initialization and then call the 2090 * ASIC specific initialization functions. Finally, create the cdev and the 2091 * Linux device to expose it to the user 2092 */ 2093 int hl_device_init(struct hl_device *hdev) 2094 { 2095 int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt; 2096 struct hl_ts_free_jobs *free_jobs_data; 2097 bool expose_interfaces_on_err = false; 2098 void *p; 2099 2100 /* Initialize ASIC function pointers and perform early init */ 2101 rc = device_early_init(hdev); 2102 if (rc) 2103 goto out_disabled; 2104 2105 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + 2106 hdev->asic_prop.user_interrupt_count; 2107 2108 if (user_interrupt_cnt) { 2109 hdev->user_interrupt = kcalloc(user_interrupt_cnt, sizeof(*hdev->user_interrupt), 2110 GFP_KERNEL); 2111 if (!hdev->user_interrupt) { 2112 rc = -ENOMEM; 2113 goto early_fini; 2114 } 2115 2116 /* Timestamp records supported only if CQ supported in device */ 2117 if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) { 2118 for (i = 0 ; i < user_interrupt_cnt ; i++) { 2119 p = vzalloc(TIMESTAMP_FREE_NODES_NUM * 2120 sizeof(struct timestamp_reg_free_node)); 2121 if (!p) { 2122 rc = -ENOMEM; 2123 goto free_usr_intr_mem; 2124 } 2125 free_jobs_data = &hdev->user_interrupt[i].ts_free_jobs_data; 2126 free_jobs_data->free_nodes_pool = p; 2127 free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM; 2128 free_jobs_data->next_avail_free_node_idx = 0; 2129 } 2130 } 2131 } 2132 2133 free_jobs_data = &hdev->common_user_cq_interrupt.ts_free_jobs_data; 2134 p = vzalloc(TIMESTAMP_FREE_NODES_NUM * 2135 sizeof(struct timestamp_reg_free_node)); 2136 if (!p) { 2137 rc = -ENOMEM; 2138 goto free_usr_intr_mem; 2139 } 2140 2141 free_jobs_data->free_nodes_pool = p; 2142 free_jobs_data->free_nodes_length = TIMESTAMP_FREE_NODES_NUM; 2143 free_jobs_data->next_avail_free_node_idx = 0; 2144 2145 /* 2146 * Start calling ASIC initialization. First S/W then H/W and finally 2147 * late init 2148 */ 2149 rc = hdev->asic_funcs->sw_init(hdev); 2150 if (rc) 2151 goto free_common_usr_intr_mem; 2152 2153 2154 /* initialize completion structure for multi CS wait */ 2155 hl_multi_cs_completion_init(hdev); 2156 2157 /* 2158 * Initialize the H/W queues. Must be done before hw_init, because 2159 * there the addresses of the kernel queue are being written to the 2160 * registers of the device 2161 */ 2162 rc = hl_hw_queues_create(hdev); 2163 if (rc) { 2164 dev_err(hdev->dev, "failed to initialize kernel queues\n"); 2165 goto sw_fini; 2166 } 2167 2168 cq_cnt = hdev->asic_prop.completion_queues_count; 2169 2170 /* 2171 * Initialize the completion queues. Must be done before hw_init, 2172 * because there the addresses of the completion queues are being 2173 * passed as arguments to request_irq 2174 */ 2175 if (cq_cnt) { 2176 hdev->completion_queue = kcalloc(cq_cnt, 2177 sizeof(*hdev->completion_queue), 2178 GFP_KERNEL); 2179 2180 if (!hdev->completion_queue) { 2181 dev_err(hdev->dev, 2182 "failed to allocate completion queues\n"); 2183 rc = -ENOMEM; 2184 goto hw_queues_destroy; 2185 } 2186 } 2187 2188 for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) { 2189 rc = hl_cq_init(hdev, &hdev->completion_queue[i], 2190 hdev->asic_funcs->get_queue_id_for_cq(hdev, i)); 2191 if (rc) { 2192 dev_err(hdev->dev, 2193 "failed to initialize completion queue\n"); 2194 goto cq_fini; 2195 } 2196 hdev->completion_queue[i].cq_idx = i; 2197 } 2198 2199 hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, 2200 sizeof(struct hl_cs *), GFP_KERNEL); 2201 if (!hdev->shadow_cs_queue) { 2202 rc = -ENOMEM; 2203 goto cq_fini; 2204 } 2205 2206 /* 2207 * Initialize the event queue. Must be done before hw_init, 2208 * because there the address of the event queue is being 2209 * passed as argument to request_irq 2210 */ 2211 rc = hl_eq_init(hdev, &hdev->event_queue); 2212 if (rc) { 2213 dev_err(hdev->dev, "failed to initialize event queue\n"); 2214 goto free_shadow_cs_queue; 2215 } 2216 2217 /* MMU S/W must be initialized before kernel context is created */ 2218 rc = hl_mmu_init(hdev); 2219 if (rc) { 2220 dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n"); 2221 goto eq_fini; 2222 } 2223 2224 /* Allocate the kernel context */ 2225 hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); 2226 if (!hdev->kernel_ctx) { 2227 rc = -ENOMEM; 2228 goto mmu_fini; 2229 } 2230 2231 hdev->is_compute_ctx_active = false; 2232 2233 hdev->asic_funcs->state_dump_init(hdev); 2234 2235 hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC; 2236 2237 hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL; 2238 2239 rc = hl_debugfs_device_init(hdev); 2240 if (rc) { 2241 dev_err(hdev->dev, "failed to initialize debugfs entry structure\n"); 2242 kfree(hdev->kernel_ctx); 2243 goto mmu_fini; 2244 } 2245 2246 /* The debugfs entry structure is accessed in hl_ctx_init(), so it must be called after 2247 * hl_debugfs_device_init(). 2248 */ 2249 rc = hl_ctx_init(hdev, hdev->kernel_ctx, true); 2250 if (rc) { 2251 dev_err(hdev->dev, "failed to initialize kernel context\n"); 2252 kfree(hdev->kernel_ctx); 2253 goto debugfs_device_fini; 2254 } 2255 2256 rc = hl_cb_pool_init(hdev); 2257 if (rc) { 2258 dev_err(hdev->dev, "failed to initialize CB pool\n"); 2259 goto release_ctx; 2260 } 2261 2262 rc = hl_dec_init(hdev); 2263 if (rc) { 2264 dev_err(hdev->dev, "Failed to initialize the decoder module\n"); 2265 goto cb_pool_fini; 2266 } 2267 2268 /* 2269 * From this point, override rc (=0) in case of an error to allow debugging 2270 * (by adding char devices and creating sysfs/debugfs files as part of the error flow). 2271 */ 2272 expose_interfaces_on_err = true; 2273 2274 /* Device is now enabled as part of the initialization requires 2275 * communication with the device firmware to get information that 2276 * is required for the initialization itself 2277 */ 2278 hdev->disabled = false; 2279 2280 rc = hdev->asic_funcs->hw_init(hdev); 2281 if (rc) { 2282 dev_err(hdev->dev, "failed to initialize the H/W\n"); 2283 rc = 0; 2284 goto out_disabled; 2285 } 2286 2287 /* Check that the communication with the device is working */ 2288 rc = hdev->asic_funcs->test_queues(hdev); 2289 if (rc) { 2290 dev_err(hdev->dev, "Failed to detect if device is alive\n"); 2291 rc = 0; 2292 goto out_disabled; 2293 } 2294 2295 rc = device_late_init(hdev); 2296 if (rc) { 2297 dev_err(hdev->dev, "Failed late initialization\n"); 2298 rc = 0; 2299 goto out_disabled; 2300 } 2301 2302 dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n", 2303 hdev->asic_name, 2304 hdev->asic_prop.dram_size / SZ_1G); 2305 2306 rc = hl_vm_init(hdev); 2307 if (rc) { 2308 dev_err(hdev->dev, "Failed to initialize memory module\n"); 2309 rc = 0; 2310 goto out_disabled; 2311 } 2312 2313 /* 2314 * Expose devices and sysfs/debugfs files to user. 2315 * From here there is no need to expose them in case of an error. 2316 */ 2317 expose_interfaces_on_err = false; 2318 2319 rc = drm_dev_register(&hdev->drm, 0); 2320 if (rc) { 2321 dev_err(hdev->dev, "Failed to register DRM device, rc %d\n", rc); 2322 rc = 0; 2323 goto out_disabled; 2324 } 2325 2326 rc = cdev_sysfs_debugfs_add(hdev); 2327 if (rc) { 2328 dev_err(hdev->dev, "Failed to add char devices and sysfs/debugfs files\n"); 2329 rc = 0; 2330 goto out_disabled; 2331 } 2332 2333 /* Need to call this again because the max power might change, 2334 * depending on card type for certain ASICs 2335 */ 2336 if (hdev->asic_prop.set_max_power_on_device_init && 2337 !hdev->asic_prop.fw_security_enabled) 2338 hl_fw_set_max_power(hdev); 2339 2340 /* 2341 * hl_hwmon_init() must be called after device_late_init(), because only 2342 * there we get the information from the device about which 2343 * hwmon-related sensors the device supports. 2344 * Furthermore, it must be done after adding the device to the system. 2345 */ 2346 rc = hl_hwmon_init(hdev); 2347 if (rc) { 2348 dev_err(hdev->dev, "Failed to initialize hwmon\n"); 2349 rc = 0; 2350 goto out_disabled; 2351 } 2352 2353 dev_notice(hdev->dev, 2354 "Successfully added device %s to habanalabs driver\n", 2355 dev_name(&(hdev)->pdev->dev)); 2356 2357 /* After initialization is done, we are ready to receive events from 2358 * the F/W. We can't do it before because we will ignore events and if 2359 * those events are fatal, we won't know about it and the device will 2360 * be operational although it shouldn't be 2361 */ 2362 hdev->asic_funcs->enable_events_from_fw(hdev); 2363 2364 hdev->init_done = true; 2365 2366 return 0; 2367 2368 cb_pool_fini: 2369 hl_cb_pool_fini(hdev); 2370 release_ctx: 2371 if (hl_ctx_put(hdev->kernel_ctx) != 1) 2372 dev_err(hdev->dev, 2373 "kernel ctx is still alive on initialization failure\n"); 2374 debugfs_device_fini: 2375 hl_debugfs_device_fini(hdev); 2376 mmu_fini: 2377 hl_mmu_fini(hdev); 2378 eq_fini: 2379 hl_eq_fini(hdev, &hdev->event_queue); 2380 free_shadow_cs_queue: 2381 kfree(hdev->shadow_cs_queue); 2382 cq_fini: 2383 for (i = 0 ; i < cq_ready_cnt ; i++) 2384 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2385 kfree(hdev->completion_queue); 2386 hw_queues_destroy: 2387 hl_hw_queues_destroy(hdev); 2388 sw_fini: 2389 hdev->asic_funcs->sw_fini(hdev); 2390 free_common_usr_intr_mem: 2391 vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool); 2392 free_usr_intr_mem: 2393 if (user_interrupt_cnt) { 2394 for (i = 0 ; i < user_interrupt_cnt ; i++) { 2395 if (!hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool) 2396 break; 2397 vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool); 2398 } 2399 kfree(hdev->user_interrupt); 2400 } 2401 early_fini: 2402 device_early_fini(hdev); 2403 out_disabled: 2404 hdev->disabled = true; 2405 if (expose_interfaces_on_err) { 2406 drm_dev_register(&hdev->drm, 0); 2407 cdev_sysfs_debugfs_add(hdev); 2408 } 2409 2410 pr_err("Failed to initialize accel%d. Device %s is NOT usable!\n", 2411 hdev->cdev_idx, dev_name(&hdev->pdev->dev)); 2412 2413 return rc; 2414 } 2415 2416 /* 2417 * hl_device_fini - main tear-down function for habanalabs device 2418 * 2419 * @hdev: pointer to habanalabs device structure 2420 * 2421 * Destroy the device, call ASIC fini functions and release the id 2422 */ 2423 void hl_device_fini(struct hl_device *hdev) 2424 { 2425 u32 user_interrupt_cnt; 2426 bool device_in_reset; 2427 ktime_t timeout; 2428 u64 reset_sec; 2429 int i, rc; 2430 2431 dev_info(hdev->dev, "Removing device %s\n", dev_name(&(hdev)->pdev->dev)); 2432 2433 hdev->device_fini_pending = 1; 2434 flush_delayed_work(&hdev->device_reset_work.reset_work); 2435 2436 if (hdev->pldm) 2437 reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT; 2438 else 2439 reset_sec = HL_HARD_RESET_MAX_TIMEOUT; 2440 2441 /* 2442 * This function is competing with the reset function, so try to 2443 * take the reset atomic and if we are already in middle of reset, 2444 * wait until reset function is finished. Reset function is designed 2445 * to always finish. However, in Gaudi, because of all the network 2446 * ports, the hard reset could take between 10-30 seconds 2447 */ 2448 2449 timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000); 2450 2451 spin_lock(&hdev->reset_info.lock); 2452 device_in_reset = !!hdev->reset_info.in_reset; 2453 if (!device_in_reset) 2454 hdev->reset_info.in_reset = 1; 2455 spin_unlock(&hdev->reset_info.lock); 2456 2457 while (device_in_reset) { 2458 usleep_range(50, 200); 2459 2460 spin_lock(&hdev->reset_info.lock); 2461 device_in_reset = !!hdev->reset_info.in_reset; 2462 if (!device_in_reset) 2463 hdev->reset_info.in_reset = 1; 2464 spin_unlock(&hdev->reset_info.lock); 2465 2466 if (ktime_compare(ktime_get(), timeout) > 0) { 2467 dev_crit(hdev->dev, 2468 "%s Failed to remove device because reset function did not finish\n", 2469 dev_name(&(hdev)->pdev->dev)); 2470 return; 2471 } 2472 } 2473 2474 cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work); 2475 2476 /* Disable PCI access from device F/W so it won't send us additional 2477 * interrupts. We disable MSI/MSI-X at the halt_engines function and we 2478 * can't have the F/W sending us interrupts after that. We need to 2479 * disable the access here because if the device is marked disable, the 2480 * message won't be send. Also, in case of heartbeat, the device CPU is 2481 * marked as disable so this message won't be sent 2482 */ 2483 hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS, 0x0); 2484 2485 /* Mark device as disabled */ 2486 hdev->disabled = true; 2487 2488 take_release_locks(hdev); 2489 2490 hdev->reset_info.hard_reset_pending = true; 2491 2492 hl_hwmon_fini(hdev); 2493 2494 cleanup_resources(hdev, true, false, false); 2495 2496 /* Kill processes here after CS rollback. This is because the process 2497 * can't really exit until all its CSs are done, which is what we 2498 * do in cs rollback 2499 */ 2500 dev_info(hdev->dev, 2501 "Waiting for all processes to exit (timeout of %u seconds)", 2502 HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI); 2503 2504 hdev->process_kill_trial_cnt = 0; 2505 rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false); 2506 if (rc) { 2507 dev_crit(hdev->dev, "Failed to kill all open processes (%d)\n", rc); 2508 device_disable_open_processes(hdev, false); 2509 } 2510 2511 hdev->process_kill_trial_cnt = 0; 2512 rc = device_kill_open_processes(hdev, 0, true); 2513 if (rc) { 2514 dev_crit(hdev->dev, "Failed to kill all control device open processes (%d)\n", rc); 2515 device_disable_open_processes(hdev, true); 2516 } 2517 2518 hl_cb_pool_fini(hdev); 2519 2520 /* Reset the H/W. It will be in idle state after this returns */ 2521 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2522 if (rc) 2523 dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); 2524 2525 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 2526 2527 /* Release kernel context */ 2528 if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1)) 2529 dev_err(hdev->dev, "kernel ctx is still alive\n"); 2530 2531 hl_dec_fini(hdev); 2532 2533 hl_vm_fini(hdev); 2534 2535 hl_mmu_fini(hdev); 2536 2537 vfree(hdev->captured_err_info.page_fault_info.user_mappings); 2538 2539 hl_eq_fini(hdev, &hdev->event_queue); 2540 2541 kfree(hdev->shadow_cs_queue); 2542 2543 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 2544 hl_cq_fini(hdev, &hdev->completion_queue[i]); 2545 kfree(hdev->completion_queue); 2546 2547 user_interrupt_cnt = hdev->asic_prop.user_dec_intr_count + 2548 hdev->asic_prop.user_interrupt_count; 2549 2550 if (user_interrupt_cnt) { 2551 if (hdev->asic_prop.first_available_cq[0] != USHRT_MAX) { 2552 for (i = 0 ; i < user_interrupt_cnt ; i++) 2553 vfree(hdev->user_interrupt[i].ts_free_jobs_data.free_nodes_pool); 2554 } 2555 2556 kfree(hdev->user_interrupt); 2557 } 2558 2559 vfree(hdev->common_user_cq_interrupt.ts_free_jobs_data.free_nodes_pool); 2560 2561 hl_hw_queues_destroy(hdev); 2562 2563 /* Call ASIC S/W finalize function */ 2564 hdev->asic_funcs->sw_fini(hdev); 2565 2566 device_early_fini(hdev); 2567 2568 /* Hide devices and sysfs/debugfs files from user */ 2569 cdev_sysfs_debugfs_remove(hdev); 2570 drm_dev_unregister(&hdev->drm); 2571 2572 hl_debugfs_device_fini(hdev); 2573 2574 pr_info("removed device successfully\n"); 2575 } 2576 2577 /* 2578 * MMIO register access helper functions. 2579 */ 2580 2581 /* 2582 * hl_rreg - Read an MMIO register 2583 * 2584 * @hdev: pointer to habanalabs device structure 2585 * @reg: MMIO register offset (in bytes) 2586 * 2587 * Returns the value of the MMIO register we are asked to read 2588 * 2589 */ 2590 inline u32 hl_rreg(struct hl_device *hdev, u32 reg) 2591 { 2592 u32 val = readl(hdev->rmmio + reg); 2593 2594 if (unlikely(trace_habanalabs_rreg32_enabled())) 2595 trace_habanalabs_rreg32(hdev->dev, reg, val); 2596 2597 return val; 2598 } 2599 2600 /* 2601 * hl_wreg - Write to an MMIO register 2602 * 2603 * @hdev: pointer to habanalabs device structure 2604 * @reg: MMIO register offset (in bytes) 2605 * @val: 32-bit value 2606 * 2607 * Writes the 32-bit value into the MMIO register 2608 * 2609 */ 2610 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val) 2611 { 2612 if (unlikely(trace_habanalabs_wreg32_enabled())) 2613 trace_habanalabs_wreg32(hdev->dev, reg, val); 2614 2615 writel(val, hdev->rmmio + reg); 2616 } 2617 2618 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2619 u8 flags) 2620 { 2621 struct razwi_info *razwi_info = &hdev->captured_err_info.razwi_info; 2622 2623 if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) { 2624 dev_err(hdev->dev, 2625 "Number of possible razwi initiators (%u) exceeded limit (%u)\n", 2626 num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR); 2627 return; 2628 } 2629 2630 /* In case it's the first razwi since the device was opened, capture its parameters */ 2631 if (atomic_cmpxchg(&hdev->captured_err_info.razwi_info.razwi_detected, 0, 1)) 2632 return; 2633 2634 razwi_info->razwi.timestamp = ktime_to_ns(ktime_get()); 2635 razwi_info->razwi.addr = addr; 2636 razwi_info->razwi.num_of_possible_engines = num_of_engines; 2637 memcpy(&razwi_info->razwi.engine_id[0], &engine_id[0], 2638 num_of_engines * sizeof(u16)); 2639 razwi_info->razwi.flags = flags; 2640 2641 razwi_info->razwi_info_available = true; 2642 } 2643 2644 void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines, 2645 u8 flags, u64 *event_mask) 2646 { 2647 hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags); 2648 2649 if (event_mask) 2650 *event_mask |= HL_NOTIFIER_EVENT_RAZWI; 2651 } 2652 2653 static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu) 2654 { 2655 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2656 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 2657 struct hl_vm_hash_node *hnode; 2658 struct hl_userptr *userptr; 2659 enum vm_type *vm_type; 2660 struct hl_ctx *ctx; 2661 u32 map_idx = 0; 2662 int i; 2663 2664 /* Reset previous session count*/ 2665 pgf_info->num_of_user_mappings = 0; 2666 2667 ctx = hl_get_compute_ctx(hdev); 2668 if (!ctx) { 2669 dev_err(hdev->dev, "Can't get user context for user mappings\n"); 2670 return; 2671 } 2672 2673 mutex_lock(&ctx->mem_hash_lock); 2674 hash_for_each(ctx->mem_hash, i, hnode, node) { 2675 vm_type = hnode->ptr; 2676 if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) || 2677 ((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu)) 2678 pgf_info->num_of_user_mappings++; 2679 2680 } 2681 2682 if (!pgf_info->num_of_user_mappings) 2683 goto finish; 2684 2685 /* In case we already allocated in previous session, need to release it before 2686 * allocating new buffer. 2687 */ 2688 vfree(pgf_info->user_mappings); 2689 pgf_info->user_mappings = 2690 vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping)); 2691 if (!pgf_info->user_mappings) { 2692 pgf_info->num_of_user_mappings = 0; 2693 goto finish; 2694 } 2695 2696 hash_for_each(ctx->mem_hash, i, hnode, node) { 2697 vm_type = hnode->ptr; 2698 if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) { 2699 userptr = hnode->ptr; 2700 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2701 pgf_info->user_mappings[map_idx].size = userptr->size; 2702 map_idx++; 2703 } else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) { 2704 phys_pg_pack = hnode->ptr; 2705 pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr; 2706 pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size; 2707 map_idx++; 2708 } 2709 } 2710 finish: 2711 mutex_unlock(&ctx->mem_hash_lock); 2712 hl_ctx_put(ctx); 2713 } 2714 2715 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu) 2716 { 2717 struct page_fault_info *pgf_info = &hdev->captured_err_info.page_fault_info; 2718 2719 /* Capture only the first page fault */ 2720 if (atomic_cmpxchg(&pgf_info->page_fault_detected, 0, 1)) 2721 return; 2722 2723 pgf_info->page_fault.timestamp = ktime_to_ns(ktime_get()); 2724 pgf_info->page_fault.addr = addr; 2725 pgf_info->page_fault.engine_id = eng_id; 2726 hl_capture_user_mappings(hdev, is_pmmu); 2727 2728 pgf_info->page_fault_info_available = true; 2729 } 2730 2731 void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu, 2732 u64 *event_mask) 2733 { 2734 hl_capture_page_fault(hdev, addr, eng_id, is_pmmu); 2735 2736 if (event_mask) 2737 *event_mask |= HL_NOTIFIER_EVENT_PAGE_FAULT; 2738 } 2739 2740 static void hl_capture_hw_err(struct hl_device *hdev, u16 event_id) 2741 { 2742 struct hw_err_info *info = &hdev->captured_err_info.hw_err; 2743 2744 /* Capture only the first HW err */ 2745 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2746 return; 2747 2748 info->event.timestamp = ktime_to_ns(ktime_get()); 2749 info->event.event_id = event_id; 2750 2751 info->event_info_available = true; 2752 } 2753 2754 void hl_handle_critical_hw_err(struct hl_device *hdev, u16 event_id, u64 *event_mask) 2755 { 2756 hl_capture_hw_err(hdev, event_id); 2757 2758 if (event_mask) 2759 *event_mask |= HL_NOTIFIER_EVENT_CRITICL_HW_ERR; 2760 } 2761 2762 static void hl_capture_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *fw_info) 2763 { 2764 struct fw_err_info *info = &hdev->captured_err_info.fw_err; 2765 2766 /* Capture only the first FW error */ 2767 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2768 return; 2769 2770 info->event.timestamp = ktime_to_ns(ktime_get()); 2771 info->event.err_type = fw_info->err_type; 2772 if (fw_info->err_type == HL_INFO_FW_REPORTED_ERR) 2773 info->event.event_id = fw_info->event_id; 2774 2775 info->event_info_available = true; 2776 } 2777 2778 void hl_handle_fw_err(struct hl_device *hdev, struct hl_info_fw_err_info *info) 2779 { 2780 hl_capture_fw_err(hdev, info); 2781 2782 if (info->event_mask) 2783 *info->event_mask |= HL_NOTIFIER_EVENT_CRITICL_FW_ERR; 2784 } 2785 2786 void hl_capture_engine_err(struct hl_device *hdev, u16 engine_id, u16 error_count) 2787 { 2788 struct engine_err_info *info = &hdev->captured_err_info.engine_err; 2789 2790 /* Capture only the first engine error */ 2791 if (atomic_cmpxchg(&info->event_detected, 0, 1)) 2792 return; 2793 2794 info->event.timestamp = ktime_to_ns(ktime_get()); 2795 info->event.engine_id = engine_id; 2796 info->event.error_count = error_count; 2797 info->event_info_available = true; 2798 } 2799 2800 void hl_enable_err_info_capture(struct hl_error_info *captured_err_info) 2801 { 2802 vfree(captured_err_info->page_fault_info.user_mappings); 2803 memset(captured_err_info, 0, sizeof(struct hl_error_info)); 2804 atomic_set(&captured_err_info->cs_timeout.write_enable, 1); 2805 captured_err_info->undef_opcode.write_enable = true; 2806 } 2807 2808 void hl_init_cpu_for_irq(struct hl_device *hdev) 2809 { 2810 #ifdef CONFIG_NUMA 2811 struct cpumask *available_mask = &hdev->irq_affinity_mask; 2812 int numa_node = hdev->pdev->dev.numa_node, i; 2813 static struct cpumask cpu_mask; 2814 2815 if (numa_node < 0) 2816 return; 2817 2818 if (!cpumask_and(&cpu_mask, cpumask_of_node(numa_node), cpu_online_mask)) { 2819 dev_err(hdev->dev, "No available affinities in current numa node\n"); 2820 return; 2821 } 2822 2823 /* Remove HT siblings */ 2824 for_each_cpu(i, &cpu_mask) 2825 cpumask_set_cpu(cpumask_first(topology_sibling_cpumask(i)), available_mask); 2826 #endif 2827 } 2828 2829 void hl_set_irq_affinity(struct hl_device *hdev, int irq) 2830 { 2831 if (cpumask_empty(&hdev->irq_affinity_mask)) { 2832 dev_dbg(hdev->dev, "affinity mask is empty\n"); 2833 return; 2834 } 2835 2836 if (irq_set_affinity_and_hint(irq, &hdev->irq_affinity_mask)) 2837 dev_err(hdev->dev, "Failed setting irq %d affinity\n", irq); 2838 } 2839