1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 * 7 */ 8 9 #define pr_fmt(fmt) "habanalabs: " fmt 10 11 #include "habanalabs.h" 12 #include "../include/hw_ip/pci/pci_general.h" 13 14 #include <linux/pci.h> 15 #include <linux/module.h> 16 #include <linux/vmalloc.h> 17 #include <linux/version.h> 18 19 #include <drm/drm_accel.h> 20 #include <drm/drm_drv.h> 21 #include <drm/drm_ioctl.h> 22 23 #define CREATE_TRACE_POINTS 24 #include <trace/events/habanalabs.h> 25 26 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" 27 28 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" 29 30 MODULE_AUTHOR(HL_DRIVER_AUTHOR); 31 MODULE_DESCRIPTION(HL_DRIVER_DESC); 32 MODULE_LICENSE("GPL v2"); 33 34 static int hl_major; 35 static DEFINE_IDR(hl_devs_idr); 36 static DEFINE_MUTEX(hl_devs_idr_lock); 37 38 #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */ 39 #define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */ 40 41 static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED; 42 static int reset_on_lockup = 1; 43 static int memory_scrub; 44 static ulong boot_error_status_mask = ULONG_MAX; 45 46 module_param(timeout_locked, int, 0444); 47 MODULE_PARM_DESC(timeout_locked, 48 "Device lockup timeout in seconds (0 = disabled, default 30s)"); 49 50 module_param(reset_on_lockup, int, 0444); 51 MODULE_PARM_DESC(reset_on_lockup, 52 "Do device reset on lockup (0 = no, 1 = yes, default yes)"); 53 54 module_param(memory_scrub, int, 0444); 55 MODULE_PARM_DESC(memory_scrub, 56 "Scrub device memory in various states (0 = no, 1 = yes, default no)"); 57 58 module_param(boot_error_status_mask, ulong, 0444); 59 MODULE_PARM_DESC(boot_error_status_mask, 60 "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); 61 62 #define PCI_IDS_GOYA 0x0001 63 #define PCI_IDS_GAUDI 0x1000 64 #define PCI_IDS_GAUDI_SEC 0x1010 65 66 #define PCI_IDS_GAUDI2 0x1020 67 68 static const struct pci_device_id ids[] = { 69 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, 70 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, 71 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, 72 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, 73 { 0, } 74 }; 75 MODULE_DEVICE_TABLE(pci, ids); 76 77 static const struct drm_ioctl_desc hl_drm_ioctls[] = { 78 DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0), 79 DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0), 80 DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0), 81 DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0), 82 DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0), 83 DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0), 84 }; 85 86 static const struct file_operations hl_fops = { 87 .owner = THIS_MODULE, 88 .open = accel_open, 89 .release = drm_release, 90 .unlocked_ioctl = drm_ioctl, 91 .compat_ioctl = drm_compat_ioctl, 92 .llseek = noop_llseek, 93 .mmap = hl_mmap 94 }; 95 96 static const struct drm_driver hl_driver = { 97 .driver_features = DRIVER_COMPUTE_ACCEL, 98 99 .name = HL_NAME, 100 .desc = HL_DRIVER_DESC, 101 .major = LINUX_VERSION_MAJOR, 102 .minor = LINUX_VERSION_PATCHLEVEL, 103 .patchlevel = LINUX_VERSION_SUBLEVEL, 104 .date = "20190505", 105 106 .fops = &hl_fops, 107 .open = hl_device_open, 108 .postclose = hl_device_release, 109 .ioctls = hl_drm_ioctls, 110 .num_ioctls = ARRAY_SIZE(hl_drm_ioctls) 111 }; 112 113 /* 114 * get_asic_type - translate device id to asic type 115 * 116 * @hdev: pointer to habanalabs device structure. 117 * 118 * Translate device id and revision id to asic type. 119 * In case of unidentified device, return -1 120 */ 121 static enum hl_asic_type get_asic_type(struct hl_device *hdev) 122 { 123 struct pci_dev *pdev = hdev->pdev; 124 enum hl_asic_type asic_type = ASIC_INVALID; 125 126 switch (pdev->device) { 127 case PCI_IDS_GOYA: 128 asic_type = ASIC_GOYA; 129 break; 130 case PCI_IDS_GAUDI: 131 asic_type = ASIC_GAUDI; 132 break; 133 case PCI_IDS_GAUDI_SEC: 134 asic_type = ASIC_GAUDI_SEC; 135 break; 136 case PCI_IDS_GAUDI2: 137 switch (pdev->revision) { 138 case REV_ID_A: 139 asic_type = ASIC_GAUDI2; 140 break; 141 case REV_ID_B: 142 asic_type = ASIC_GAUDI2B; 143 break; 144 case REV_ID_C: 145 asic_type = ASIC_GAUDI2C; 146 break; 147 default: 148 break; 149 } 150 break; 151 default: 152 break; 153 } 154 155 return asic_type; 156 } 157 158 static bool is_asic_secured(enum hl_asic_type asic_type) 159 { 160 switch (asic_type) { 161 case ASIC_GAUDI_SEC: 162 return true; 163 default: 164 return false; 165 } 166 } 167 168 /* 169 * hl_device_open() - open function for habanalabs device. 170 * @ddev: pointer to DRM device structure. 171 * @file: pointer to DRM file private data structure. 172 * 173 * Called when process opens an habanalabs device. 174 */ 175 int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv) 176 { 177 struct hl_device *hdev = to_hl_device(ddev); 178 enum hl_device_status status; 179 struct hl_fpriv *hpriv; 180 int rc; 181 182 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 183 if (!hpriv) 184 return -ENOMEM; 185 186 hpriv->hdev = hdev; 187 mutex_init(&hpriv->notifier_event.lock); 188 mutex_init(&hpriv->restore_phase_mutex); 189 mutex_init(&hpriv->ctx_lock); 190 kref_init(&hpriv->refcount); 191 192 hl_ctx_mgr_init(&hpriv->ctx_mgr); 193 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); 194 195 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); 196 197 mutex_lock(&hdev->fpriv_list_lock); 198 199 if (!hl_device_operational(hdev, &status)) { 200 dev_dbg_ratelimited(hdev->dev, 201 "Can't open %s because it is %s\n", 202 dev_name(hdev->dev), hdev->status[status]); 203 204 if (status == HL_DEVICE_STATUS_IN_RESET || 205 status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE) 206 rc = -EAGAIN; 207 else 208 rc = -EPERM; 209 210 goto out_err; 211 } 212 213 if (hdev->is_in_dram_scrub) { 214 dev_dbg_ratelimited(hdev->dev, 215 "Can't open %s during dram scrub\n", 216 dev_name(hdev->dev)); 217 rc = -EAGAIN; 218 goto out_err; 219 } 220 221 if (hdev->compute_ctx_in_release) { 222 dev_dbg_ratelimited(hdev->dev, 223 "Can't open %s because another user is still releasing it\n", 224 dev_name(hdev->dev)); 225 rc = -EAGAIN; 226 goto out_err; 227 } 228 229 if (hdev->is_compute_ctx_active) { 230 dev_dbg_ratelimited(hdev->dev, 231 "Can't open %s because another user is working on it\n", 232 dev_name(hdev->dev)); 233 rc = -EBUSY; 234 goto out_err; 235 } 236 237 rc = hl_ctx_create(hdev, hpriv); 238 if (rc) { 239 dev_err(hdev->dev, "Failed to create context %d\n", rc); 240 goto out_err; 241 } 242 243 list_add(&hpriv->dev_node, &hdev->fpriv_list); 244 mutex_unlock(&hdev->fpriv_list_lock); 245 246 hdev->asic_funcs->send_device_activity(hdev, true); 247 248 hl_debugfs_add_file(hpriv); 249 250 hl_enable_err_info_capture(&hdev->captured_err_info); 251 252 hdev->open_counter++; 253 hdev->last_successful_open_jif = jiffies; 254 hdev->last_successful_open_ktime = ktime_get(); 255 256 file_priv->driver_priv = hpriv; 257 hpriv->file_priv = file_priv; 258 259 return 0; 260 261 out_err: 262 mutex_unlock(&hdev->fpriv_list_lock); 263 hl_mem_mgr_fini(&hpriv->mem_mgr); 264 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); 265 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); 266 mutex_destroy(&hpriv->ctx_lock); 267 mutex_destroy(&hpriv->restore_phase_mutex); 268 mutex_destroy(&hpriv->notifier_event.lock); 269 put_pid(hpriv->taskpid); 270 271 kfree(hpriv); 272 273 return rc; 274 } 275 276 int hl_device_open_ctrl(struct inode *inode, struct file *filp) 277 { 278 struct hl_device *hdev; 279 struct hl_fpriv *hpriv; 280 int rc; 281 282 mutex_lock(&hl_devs_idr_lock); 283 hdev = idr_find(&hl_devs_idr, iminor(inode)); 284 mutex_unlock(&hl_devs_idr_lock); 285 286 if (!hdev) { 287 pr_err("Couldn't find device %d:%d\n", 288 imajor(inode), iminor(inode)); 289 return -ENXIO; 290 } 291 292 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); 293 if (!hpriv) 294 return -ENOMEM; 295 296 /* Prevent other routines from reading partial hpriv data by 297 * initializing hpriv fields before inserting it to the list 298 */ 299 hpriv->hdev = hdev; 300 filp->private_data = hpriv; 301 302 nonseekable_open(inode, filp); 303 304 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); 305 306 mutex_lock(&hdev->fpriv_ctrl_list_lock); 307 308 if (!hl_ctrl_device_operational(hdev, NULL)) { 309 dev_dbg_ratelimited(hdev->dev_ctrl, 310 "Can't open %s because it is disabled\n", 311 dev_name(hdev->dev_ctrl)); 312 rc = -EPERM; 313 goto out_err; 314 } 315 316 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list); 317 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 318 319 return 0; 320 321 out_err: 322 mutex_unlock(&hdev->fpriv_ctrl_list_lock); 323 filp->private_data = NULL; 324 put_pid(hpriv->taskpid); 325 326 kfree(hpriv); 327 328 return rc; 329 } 330 331 static void set_driver_behavior_per_device(struct hl_device *hdev) 332 { 333 hdev->nic_ports_mask = 0; 334 hdev->fw_components = FW_TYPE_ALL_TYPES; 335 hdev->cpu_queues_enable = 1; 336 hdev->pldm = 0; 337 hdev->hard_reset_on_fw_events = 1; 338 hdev->bmc_enable = 1; 339 hdev->reset_on_preboot_fail = 1; 340 hdev->heartbeat = 1; 341 } 342 343 static void copy_kernel_module_params_to_device(struct hl_device *hdev) 344 { 345 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); 346 347 hdev->major = hl_major; 348 hdev->memory_scrub = memory_scrub; 349 hdev->reset_on_lockup = reset_on_lockup; 350 hdev->boot_error_status_mask = boot_error_status_mask; 351 } 352 353 static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) 354 { 355 switch (hdev->asic_type) { 356 case ASIC_GAUDI: 357 case ASIC_GAUDI_SEC: 358 /* If user didn't request a different timeout than the default one, we have 359 * a different default timeout for Gaudi 360 */ 361 if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) 362 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * 363 MSEC_PER_SEC); 364 365 hdev->reset_upon_device_release = 0; 366 break; 367 368 case ASIC_GOYA: 369 hdev->reset_upon_device_release = 0; 370 break; 371 372 default: 373 hdev->reset_upon_device_release = 1; 374 break; 375 } 376 } 377 378 static int fixup_device_params(struct hl_device *hdev) 379 { 380 int tmp_timeout; 381 382 tmp_timeout = timeout_locked; 383 384 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 385 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 386 387 if (tmp_timeout) 388 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); 389 else 390 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; 391 392 hdev->stop_on_err = true; 393 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; 394 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT; 395 396 /* Enable only after the initialization of the device */ 397 hdev->disabled = true; 398 399 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) && 400 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) { 401 pr_err("Preboot must be set along with other components"); 402 return -EINVAL; 403 } 404 405 /* If CPU queues not enabled, no way to do heartbeat */ 406 if (!hdev->cpu_queues_enable) 407 hdev->heartbeat = 0; 408 fixup_device_params_per_asic(hdev, tmp_timeout); 409 410 return 0; 411 } 412 413 static int allocate_device_id(struct hl_device *hdev) 414 { 415 int id; 416 417 mutex_lock(&hl_devs_idr_lock); 418 id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL); 419 mutex_unlock(&hl_devs_idr_lock); 420 421 if (id < 0) { 422 if (id == -ENOSPC) 423 pr_err("too many devices in the system\n"); 424 return -EBUSY; 425 } 426 427 hdev->id = id; 428 429 /* 430 * Firstly initialized with the internal device ID. 431 * Will be updated later after the DRM device registration to hold the minor ID. 432 */ 433 hdev->cdev_idx = hdev->id; 434 435 return 0; 436 } 437 438 /** 439 * create_hdev - create habanalabs device instance 440 * 441 * @dev: will hold the pointer to the new habanalabs device structure 442 * @pdev: pointer to the pci device 443 * 444 * Allocate memory for habanalabs device and initialize basic fields 445 * Identify the ASIC type 446 * Allocate ID (minor) for the device (only for real devices) 447 */ 448 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) 449 { 450 struct hl_device *hdev; 451 int rc; 452 453 *dev = NULL; 454 455 hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm); 456 if (IS_ERR(hdev)) 457 return PTR_ERR(hdev); 458 459 hdev->dev = hdev->drm.dev; 460 461 /* Will be NULL in case of simulator device */ 462 hdev->pdev = pdev; 463 464 /* Assign status description string */ 465 strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); 466 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); 467 strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); 468 strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); 469 strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], 470 "in device creation", HL_STR_MAX); 471 strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], 472 "in reset after device release", HL_STR_MAX); 473 474 475 /* First, we must find out which ASIC are we handling. This is needed 476 * to configure the behavior of the driver (kernel parameters) 477 */ 478 hdev->asic_type = get_asic_type(hdev); 479 if (hdev->asic_type == ASIC_INVALID) { 480 dev_err(&pdev->dev, "Unsupported ASIC\n"); 481 rc = -ENODEV; 482 goto out_err; 483 } 484 485 copy_kernel_module_params_to_device(hdev); 486 487 set_driver_behavior_per_device(hdev); 488 489 fixup_device_params(hdev); 490 491 rc = allocate_device_id(hdev); 492 if (rc) 493 goto out_err; 494 495 *dev = hdev; 496 497 return 0; 498 499 out_err: 500 return rc; 501 } 502 503 /* 504 * destroy_hdev - destroy habanalabs device instance 505 * 506 * @dev: pointer to the habanalabs device structure 507 * 508 */ 509 static void destroy_hdev(struct hl_device *hdev) 510 { 511 /* Remove device from the device list */ 512 mutex_lock(&hl_devs_idr_lock); 513 idr_remove(&hl_devs_idr, hdev->id); 514 mutex_unlock(&hl_devs_idr_lock); 515 516 } 517 518 static int hl_pmops_suspend(struct device *dev) 519 { 520 struct hl_device *hdev = dev_get_drvdata(dev); 521 522 pr_debug("Going to suspend PCI device\n"); 523 524 if (!hdev) { 525 pr_err("device pointer is NULL in suspend\n"); 526 return 0; 527 } 528 529 return hl_device_suspend(hdev); 530 } 531 532 static int hl_pmops_resume(struct device *dev) 533 { 534 struct hl_device *hdev = dev_get_drvdata(dev); 535 536 pr_debug("Going to resume PCI device\n"); 537 538 if (!hdev) { 539 pr_err("device pointer is NULL in resume\n"); 540 return 0; 541 } 542 543 return hl_device_resume(hdev); 544 } 545 546 /** 547 * hl_pci_probe - probe PCI habanalabs devices 548 * 549 * @pdev: pointer to pci device 550 * @id: pointer to pci device id structure 551 * 552 * Standard PCI probe function for habanalabs device. 553 * Create a new habanalabs device and initialize it according to the 554 * device's type 555 */ 556 static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 557 { 558 struct hl_device *hdev; 559 int rc; 560 561 dev_info(&pdev->dev, HL_NAME 562 " device found [%04x:%04x] (rev %x)\n", 563 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision); 564 565 rc = create_hdev(&hdev, pdev); 566 if (rc) 567 return rc; 568 569 pci_set_drvdata(pdev, hdev); 570 571 rc = hl_device_init(hdev); 572 if (rc) { 573 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n"); 574 rc = -ENODEV; 575 goto disable_device; 576 } 577 578 return 0; 579 580 disable_device: 581 pci_set_drvdata(pdev, NULL); 582 destroy_hdev(hdev); 583 584 return rc; 585 } 586 587 /* 588 * hl_pci_remove - remove PCI habanalabs devices 589 * 590 * @pdev: pointer to pci device 591 * 592 * Standard PCI remove function for habanalabs device 593 */ 594 static void hl_pci_remove(struct pci_dev *pdev) 595 { 596 struct hl_device *hdev; 597 598 hdev = pci_get_drvdata(pdev); 599 if (!hdev) 600 return; 601 602 hl_device_fini(hdev); 603 pci_set_drvdata(pdev, NULL); 604 destroy_hdev(hdev); 605 } 606 607 /** 608 * hl_pci_err_detected - a PCI bus error detected on this device 609 * 610 * @pdev: pointer to pci device 611 * @state: PCI error type 612 * 613 * Called by the PCI subsystem whenever a non-correctable 614 * PCI bus error is detected 615 */ 616 static pci_ers_result_t 617 hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) 618 { 619 struct hl_device *hdev = pci_get_drvdata(pdev); 620 enum pci_ers_result result; 621 622 switch (state) { 623 case pci_channel_io_normal: 624 dev_warn(hdev->dev, "PCI normal state error detected\n"); 625 return PCI_ERS_RESULT_CAN_RECOVER; 626 627 case pci_channel_io_frozen: 628 dev_warn(hdev->dev, "PCI frozen state error detected\n"); 629 result = PCI_ERS_RESULT_NEED_RESET; 630 break; 631 632 case pci_channel_io_perm_failure: 633 dev_warn(hdev->dev, "PCI failure state error detected\n"); 634 result = PCI_ERS_RESULT_DISCONNECT; 635 break; 636 637 default: 638 result = PCI_ERS_RESULT_NONE; 639 } 640 641 hdev->asic_funcs->halt_engines(hdev, true, false); 642 643 return result; 644 } 645 646 /** 647 * hl_pci_err_resume - resume after a PCI slot reset 648 * 649 * @pdev: pointer to pci device 650 * 651 */ 652 static void hl_pci_err_resume(struct pci_dev *pdev) 653 { 654 struct hl_device *hdev = pci_get_drvdata(pdev); 655 656 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n"); 657 hl_device_resume(hdev); 658 } 659 660 /** 661 * hl_pci_err_slot_reset - a PCI slot reset has just happened 662 * 663 * @pdev: pointer to pci device 664 * 665 * Determine if the driver can recover from the PCI slot reset 666 */ 667 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev) 668 { 669 struct hl_device *hdev = pci_get_drvdata(pdev); 670 671 dev_warn(hdev->dev, "PCI slot reset detected\n"); 672 673 return PCI_ERS_RESULT_RECOVERED; 674 } 675 676 static void hl_pci_reset_prepare(struct pci_dev *pdev) 677 { 678 struct hl_device *hdev; 679 680 hdev = pci_get_drvdata(pdev); 681 if (!hdev) 682 return; 683 684 hdev->disabled = true; 685 } 686 687 static void hl_pci_reset_done(struct pci_dev *pdev) 688 { 689 struct hl_device *hdev; 690 u32 flags; 691 692 hdev = pci_get_drvdata(pdev); 693 if (!hdev) 694 return; 695 696 /* 697 * Schedule a thread to trigger hard reset. 698 * The reason for this handler, is for rare cases where the driver is up 699 * and FLR occurs. This is valid only when working with no VM, so FW handles FLR 700 * and resets the device. FW will go back preboot stage, so driver needs to perform 701 * hard reset in order to load FW fit again. 702 */ 703 flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW; 704 705 hl_device_reset(hdev, flags); 706 } 707 708 static const struct dev_pm_ops hl_pm_ops = { 709 .suspend = hl_pmops_suspend, 710 .resume = hl_pmops_resume, 711 }; 712 713 static const struct pci_error_handlers hl_pci_err_handler = { 714 .error_detected = hl_pci_err_detected, 715 .slot_reset = hl_pci_err_slot_reset, 716 .resume = hl_pci_err_resume, 717 .reset_prepare = hl_pci_reset_prepare, 718 .reset_done = hl_pci_reset_done, 719 }; 720 721 static struct pci_driver hl_pci_driver = { 722 .name = HL_NAME, 723 .id_table = ids, 724 .probe = hl_pci_probe, 725 .remove = hl_pci_remove, 726 .shutdown = hl_pci_remove, 727 .driver = { 728 .name = HL_NAME, 729 .pm = &hl_pm_ops, 730 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 731 }, 732 .err_handler = &hl_pci_err_handler, 733 }; 734 735 /* 736 * hl_init - Initialize the habanalabs kernel driver 737 */ 738 static int __init hl_init(void) 739 { 740 int rc; 741 dev_t dev; 742 743 pr_info("loading driver\n"); 744 745 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME); 746 if (rc < 0) { 747 pr_err("unable to get major\n"); 748 return rc; 749 } 750 751 hl_major = MAJOR(dev); 752 753 rc = pci_register_driver(&hl_pci_driver); 754 if (rc) { 755 pr_err("failed to register pci device\n"); 756 goto remove_major; 757 } 758 759 pr_debug("driver loaded\n"); 760 761 return 0; 762 763 remove_major: 764 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 765 return rc; 766 } 767 768 /* 769 * hl_exit - Release all resources of the habanalabs kernel driver 770 */ 771 static void __exit hl_exit(void) 772 { 773 pci_unregister_driver(&hl_pci_driver); 774 775 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); 776 777 idr_destroy(&hl_devs_idr); 778 779 pr_debug("driver removed\n"); 780 } 781 782 module_init(hl_init); 783 module_exit(hl_exit); 784