1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3
4 #include <linux/init.h>
5 #include <linux/module.h>
6 #include <linux/pci.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/types.h>
9 #include <net/devlink.h>
10
11 #include "fbnic.h"
12 #include "fbnic_drvinfo.h"
13 #include "fbnic_hw_stats.h"
14 #include "fbnic_netdev.h"
15
16 char fbnic_driver_name[] = DRV_NAME;
17
18 MODULE_DESCRIPTION(DRV_SUMMARY);
19 MODULE_LICENSE("GPL");
20
21 static const struct fbnic_info fbnic_asic_info = {
22 .max_num_queues = FBNIC_MAX_QUEUES,
23 .bar_mask = BIT(0) | BIT(4)
24 };
25
26 static const struct fbnic_info *fbnic_info_tbl[] = {
27 [fbnic_board_asic] = &fbnic_asic_info,
28 };
29
30 static const struct pci_device_id fbnic_pci_tbl[] = {
31 { PCI_DEVICE_DATA(META, FBNIC_ASIC, fbnic_board_asic) },
32 /* Required last entry */
33 {0, }
34 };
35 MODULE_DEVICE_TABLE(pci, fbnic_pci_tbl);
36
fbnic_rd32(struct fbnic_dev * fbd,u32 reg)37 u32 fbnic_rd32(struct fbnic_dev *fbd, u32 reg)
38 {
39 u32 __iomem *csr = READ_ONCE(fbd->uc_addr0);
40 u32 value;
41
42 if (!csr)
43 return ~0U;
44
45 value = readl(csr + reg);
46
47 /* If any bits are 0 value should be valid */
48 if (~value)
49 return value;
50
51 /* All 1's may be valid if ZEROs register still works */
52 if (reg != FBNIC_MASTER_SPARE_0 && ~readl(csr + FBNIC_MASTER_SPARE_0))
53 return value;
54
55 /* Hardware is giving us all 1's reads, assume it is gone */
56 WRITE_ONCE(fbd->uc_addr0, NULL);
57 WRITE_ONCE(fbd->uc_addr4, NULL);
58
59 dev_err(fbd->dev,
60 "Failed read (idx 0x%x AKA addr 0x%x), disabled CSR access, awaiting reset\n",
61 reg, reg << 2);
62
63 /* Notify stack that device has lost (PCIe) link */
64 if (!fbnic_init_failure(fbd))
65 netif_device_detach(fbd->netdev);
66
67 return ~0U;
68 }
69
fbnic_fw_present(struct fbnic_dev * fbd)70 bool fbnic_fw_present(struct fbnic_dev *fbd)
71 {
72 return !!READ_ONCE(fbd->uc_addr4);
73 }
74
fbnic_fw_wr32(struct fbnic_dev * fbd,u32 reg,u32 val)75 void fbnic_fw_wr32(struct fbnic_dev *fbd, u32 reg, u32 val)
76 {
77 u32 __iomem *csr = READ_ONCE(fbd->uc_addr4);
78
79 if (csr)
80 writel(val, csr + reg);
81 }
82
fbnic_fw_rd32(struct fbnic_dev * fbd,u32 reg)83 u32 fbnic_fw_rd32(struct fbnic_dev *fbd, u32 reg)
84 {
85 u32 __iomem *csr = READ_ONCE(fbd->uc_addr4);
86 u32 value;
87
88 if (!csr)
89 return ~0U;
90
91 value = readl(csr + reg);
92
93 /* If any bits are 0 value should be valid */
94 if (~value)
95 return value;
96
97 /* All 1's may be valid if ZEROs register still works */
98 if (reg != FBNIC_FW_ZERO_REG && ~readl(csr + FBNIC_FW_ZERO_REG))
99 return value;
100
101 /* Hardware is giving us all 1's reads, assume it is gone */
102 WRITE_ONCE(fbd->uc_addr0, NULL);
103 WRITE_ONCE(fbd->uc_addr4, NULL);
104
105 dev_err(fbd->dev,
106 "Failed read (idx 0x%x AKA addr 0x%x), disabled CSR access, awaiting reset\n",
107 reg, reg << 2);
108
109 /* Notify stack that device has lost (PCIe) link */
110 if (!fbnic_init_failure(fbd))
111 netif_device_detach(fbd->netdev);
112
113 return ~0U;
114 }
115
fbnic_service_task_start(struct fbnic_net * fbn)116 static void fbnic_service_task_start(struct fbnic_net *fbn)
117 {
118 struct fbnic_dev *fbd = fbn->fbd;
119
120 schedule_delayed_work(&fbd->service_task, HZ);
121 }
122
fbnic_service_task_stop(struct fbnic_net * fbn)123 static void fbnic_service_task_stop(struct fbnic_net *fbn)
124 {
125 struct fbnic_dev *fbd = fbn->fbd;
126
127 cancel_delayed_work(&fbd->service_task);
128 }
129
fbnic_up(struct fbnic_net * fbn)130 void fbnic_up(struct fbnic_net *fbn)
131 {
132 fbnic_enable(fbn);
133
134 fbnic_fill(fbn);
135
136 fbnic_rss_reinit_hw(fbn->fbd, fbn);
137
138 __fbnic_set_rx_mode(fbn->fbd);
139
140 /* Enable Tx/Rx processing */
141 fbnic_napi_enable(fbn);
142 netif_tx_start_all_queues(fbn->netdev);
143
144 fbnic_service_task_start(fbn);
145
146 fbnic_dbg_up(fbn);
147 }
148
fbnic_down_noidle(struct fbnic_net * fbn)149 void fbnic_down_noidle(struct fbnic_net *fbn)
150 {
151 fbnic_dbg_down(fbn);
152
153 fbnic_service_task_stop(fbn);
154
155 /* Disable Tx/Rx Processing */
156 fbnic_napi_disable(fbn);
157 netif_tx_disable(fbn->netdev);
158
159 fbnic_clear_rx_mode(fbn->fbd);
160 fbnic_clear_rules(fbn->fbd);
161 fbnic_rss_disable_hw(fbn->fbd);
162 fbnic_disable(fbn);
163 }
164
fbnic_down(struct fbnic_net * fbn)165 void fbnic_down(struct fbnic_net *fbn)
166 {
167 fbnic_down_noidle(fbn);
168
169 fbnic_wait_all_queues_idle(fbn->fbd, false);
170
171 fbnic_flush(fbn);
172 }
173
fbnic_fw_config_after_crash(struct fbnic_dev * fbd)174 static int fbnic_fw_config_after_crash(struct fbnic_dev *fbd)
175 {
176 if (fbnic_fw_xmit_ownership_msg(fbd, true)) {
177 dev_err(fbd->dev, "NIC failed to take ownership\n");
178
179 return -1;
180 }
181
182 fbnic_rpc_reset_valid_entries(fbd);
183 __fbnic_set_rx_mode(fbd);
184
185 return 0;
186 }
187
fbnic_health_check(struct fbnic_dev * fbd)188 static void fbnic_health_check(struct fbnic_dev *fbd)
189 {
190 struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX];
191
192 /* As long as the heart is beating the FW is healthy */
193 if (fbd->fw_heartbeat_enabled)
194 return;
195
196 /* If the Tx mailbox still has messages sitting in it then there likely
197 * isn't anything we can do. We will wait until the mailbox is empty to
198 * report the fault so we can collect the crashlog.
199 */
200 if (tx_mbx->head != tx_mbx->tail)
201 return;
202
203 fbnic_devlink_fw_report(fbd, "Firmware crash detected!");
204 fbnic_devlink_otp_check(fbd, "error detected after firmware recovery");
205
206 if (fbnic_fw_config_after_crash(fbd))
207 dev_err(fbd->dev, "Firmware recovery failed after crash\n");
208 }
209
fbnic_service_task(struct work_struct * work)210 static void fbnic_service_task(struct work_struct *work)
211 {
212 struct fbnic_dev *fbd = container_of(to_delayed_work(work),
213 struct fbnic_dev, service_task);
214 struct net_device *netdev = fbd->netdev;
215
216 if (netif_running(netdev))
217 fbnic_phylink_pmd_training_complete_notify(netdev);
218
219 rtnl_lock();
220
221 fbnic_get_hw_stats32(fbd);
222
223 fbnic_fw_check_heartbeat(fbd);
224
225 fbnic_health_check(fbd);
226
227 fbnic_bmc_rpc_check(fbd);
228
229 if (netif_carrier_ok(fbd->netdev)) {
230 netdev_lock(fbd->netdev);
231 fbnic_napi_depletion_check(fbd->netdev);
232 netdev_unlock(fbd->netdev);
233 }
234
235 if (netif_running(netdev))
236 schedule_delayed_work(&fbd->service_task, HZ);
237
238 rtnl_unlock();
239 }
240
241 /**
242 * fbnic_probe - Device Initialization Routine
243 * @pdev: PCI device information struct
244 * @ent: entry in fbnic_pci_tbl
245 *
246 * Initializes a PCI device identified by a pci_dev structure.
247 * The OS initialization, configuring of the adapter private structure,
248 * and a hardware reset occur.
249 *
250 * Return: 0 on success, negative on failure
251 **/
fbnic_probe(struct pci_dev * pdev,const struct pci_device_id * ent)252 static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
253 {
254 const struct fbnic_info *info = fbnic_info_tbl[ent->driver_data];
255 struct net_device *netdev;
256 struct fbnic_dev *fbd;
257 int err;
258
259 if (pdev->error_state != pci_channel_io_normal) {
260 dev_err(&pdev->dev,
261 "PCI device still in an error state. Unable to load...\n");
262 return -EIO;
263 }
264
265 err = pcim_enable_device(pdev);
266 if (err) {
267 dev_err(&pdev->dev, "PCI enable device failed: %d\n", err);
268 return err;
269 }
270
271 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(46));
272 if (err)
273 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
274 if (err) {
275 dev_err(&pdev->dev, "DMA configuration failed: %d\n", err);
276 return err;
277 }
278
279 err = pcim_iomap_regions(pdev, info->bar_mask, fbnic_driver_name);
280 if (err) {
281 dev_err(&pdev->dev,
282 "pci_request_selected_regions failed: %d\n", err);
283 return err;
284 }
285
286 fbd = fbnic_devlink_alloc(pdev);
287 if (!fbd) {
288 dev_err(&pdev->dev, "Devlink allocation failed\n");
289 return -ENOMEM;
290 }
291
292 err = fbnic_devlink_health_create(fbd);
293 if (err)
294 goto free_fbd;
295
296 /* Populate driver with hardware-specific info and handlers */
297 fbd->max_num_queues = info->max_num_queues;
298
299 pci_set_master(pdev);
300 pci_save_state(pdev);
301
302 INIT_DELAYED_WORK(&fbd->service_task, fbnic_service_task);
303
304 err = fbnic_alloc_irqs(fbd);
305 if (err)
306 goto err_destroy_health;
307
308 err = fbnic_mac_init(fbd);
309 if (err) {
310 dev_err(&pdev->dev, "Failed to initialize MAC: %d\n", err);
311 goto free_irqs;
312 }
313
314 err = fbnic_fw_log_init(fbd);
315 if (err)
316 dev_warn(fbd->dev,
317 "Unable to initialize firmware log buffer: %d\n",
318 err);
319
320 err = fbnic_fw_request_mbx(fbd);
321 if (err) {
322 dev_err(&pdev->dev,
323 "Firmware mailbox initialization failure\n");
324 goto free_fw_log;
325 }
326
327 /* Send the request to enable the FW logging to host. Note if this
328 * fails we ignore the error and just display a message as it is
329 * possible the FW is just too old to support the logging and needs
330 * to be updated.
331 */
332 fbnic_fw_log_enable(fbd, true);
333
334 fbnic_devlink_register(fbd);
335 fbnic_devlink_otp_check(fbd, "error detected during probe");
336 fbnic_dbg_fbd_init(fbd);
337
338 /* Capture snapshot of hardware stats so netdev can calculate delta */
339 fbnic_init_hw_stats(fbd);
340
341 fbnic_hwmon_register(fbd);
342
343 if (!fbd->dsn) {
344 dev_warn(&pdev->dev, "Reading serial number failed\n");
345 goto init_failure_mode;
346 }
347
348 if (fbnic_mdiobus_create(fbd))
349 goto init_failure_mode;
350
351 netdev = fbnic_netdev_alloc(fbd);
352 if (!netdev) {
353 dev_err(&pdev->dev, "Netdev allocation failed\n");
354 goto init_failure_mode;
355 }
356
357 err = fbnic_ptp_setup(fbd);
358 if (err)
359 goto ifm_free_netdev;
360
361 err = fbnic_netdev_register(netdev);
362 if (err) {
363 dev_err(&pdev->dev, "Netdev registration failed: %d\n", err);
364 goto ifm_destroy_ptp;
365 }
366
367 return 0;
368
369 ifm_destroy_ptp:
370 fbnic_ptp_destroy(fbd);
371 ifm_free_netdev:
372 fbnic_netdev_free(fbd);
373 init_failure_mode:
374 dev_warn(&pdev->dev, "Probe error encountered, entering init failure mode. Normal networking functionality will not be available.\n");
375 /* Always return 0 even on error so devlink is registered to allow
376 * firmware updates for fixes.
377 */
378 return 0;
379 free_fw_log:
380 fbnic_fw_log_free(fbd);
381 free_irqs:
382 fbnic_free_irqs(fbd);
383 err_destroy_health:
384 fbnic_devlink_health_destroy(fbd);
385 free_fbd:
386 fbnic_devlink_free(fbd);
387
388 return err;
389 }
390
391 /**
392 * fbnic_remove - Device Removal Routine
393 * @pdev: PCI device information struct
394 *
395 * Called by the PCI subsystem to alert the driver that it should release
396 * a PCI device. This could be caused by a Hot-Plug event, or because the
397 * driver is going to be removed from memory.
398 **/
fbnic_remove(struct pci_dev * pdev)399 static void fbnic_remove(struct pci_dev *pdev)
400 {
401 struct fbnic_dev *fbd = pci_get_drvdata(pdev);
402
403 if (!fbnic_init_failure(fbd)) {
404 struct net_device *netdev = fbd->netdev;
405
406 fbnic_netdev_unregister(netdev);
407 cancel_delayed_work_sync(&fbd->service_task);
408 fbnic_ptp_destroy(fbd);
409 fbnic_netdev_free(fbd);
410 }
411
412 fbnic_hwmon_unregister(fbd);
413 fbnic_dbg_fbd_exit(fbd);
414 fbnic_devlink_unregister(fbd);
415 fbnic_fw_log_disable(fbd);
416 fbnic_fw_free_mbx(fbd);
417 fbnic_fw_log_free(fbd);
418 fbnic_free_irqs(fbd);
419
420 fbnic_devlink_health_destroy(fbd);
421 fbnic_devlink_free(fbd);
422 }
423
fbnic_pm_suspend(struct device * dev)424 static int fbnic_pm_suspend(struct device *dev)
425 {
426 struct fbnic_dev *fbd = dev_get_drvdata(dev);
427 struct net_device *netdev = fbd->netdev;
428
429 if (fbnic_init_failure(fbd))
430 goto null_uc_addr;
431
432 rtnl_lock();
433 netdev_lock(netdev);
434
435 netif_device_detach(netdev);
436
437 if (netif_running(netdev))
438 netdev->netdev_ops->ndo_stop(netdev);
439
440 netdev_unlock(netdev);
441 rtnl_unlock();
442
443 null_uc_addr:
444 fbnic_fw_log_disable(fbd);
445
446 devl_lock(priv_to_devlink(fbd));
447
448 fbnic_fw_free_mbx(fbd);
449
450 devl_unlock(priv_to_devlink(fbd));
451
452 /* Free the IRQs so they aren't trying to occupy sleeping CPUs */
453 fbnic_free_irqs(fbd);
454
455 /* Hardware is about to go away, so switch off MMIO access internally */
456 WRITE_ONCE(fbd->uc_addr0, NULL);
457 WRITE_ONCE(fbd->uc_addr4, NULL);
458
459 return 0;
460 }
461
__fbnic_pm_resume(struct device * dev)462 static int __fbnic_pm_resume(struct device *dev)
463 {
464 struct fbnic_dev *fbd = dev_get_drvdata(dev);
465 struct net_device *netdev = fbd->netdev;
466 void __iomem * const *iomap_table;
467 struct fbnic_net *fbn;
468 int err;
469
470 /* Restore MMIO access */
471 iomap_table = pcim_iomap_table(to_pci_dev(dev));
472 fbd->uc_addr0 = iomap_table[0];
473 fbd->uc_addr4 = iomap_table[4];
474
475 /* Rerequest the IRQs */
476 err = fbnic_alloc_irqs(fbd);
477 if (err)
478 goto err_invalidate_uc_addr;
479
480 fbd->mac->init_regs(fbd);
481
482 devl_lock(priv_to_devlink(fbd));
483
484 /* Re-enable mailbox */
485 err = fbnic_fw_request_mbx(fbd);
486 devl_unlock(priv_to_devlink(fbd));
487 if (err)
488 goto err_free_irqs;
489
490 /* Only send log history if log buffer is empty to prevent duplicate
491 * log entries.
492 */
493 fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries));
494
495 /* Since the FW should be up, check if it reported OTP errors */
496 fbnic_devlink_otp_check(fbd, "error detected after PM resume");
497
498 /* No netdev means there isn't a network interface to bring up */
499 if (fbnic_init_failure(fbd))
500 return 0;
501
502 fbn = netdev_priv(netdev);
503
504 /* Reset the queues if needed */
505 fbnic_reset_queues(fbn, fbn->num_tx_queues, fbn->num_rx_queues);
506
507 rtnl_lock();
508 netdev_lock(netdev);
509
510 if (netif_running(netdev))
511 err = __fbnic_open(fbn);
512
513 netdev_unlock(netdev);
514 rtnl_unlock();
515 if (err)
516 goto err_free_mbx;
517
518 return 0;
519 err_free_mbx:
520 fbnic_fw_log_disable(fbd);
521
522 devl_lock(priv_to_devlink(fbd));
523 fbnic_fw_free_mbx(fbd);
524 devl_unlock(priv_to_devlink(fbd));
525 err_free_irqs:
526 fbnic_free_irqs(fbd);
527 err_invalidate_uc_addr:
528 WRITE_ONCE(fbd->uc_addr0, NULL);
529 WRITE_ONCE(fbd->uc_addr4, NULL);
530 return err;
531 }
532
__fbnic_pm_attach(struct device * dev)533 static void __fbnic_pm_attach(struct device *dev)
534 {
535 struct fbnic_dev *fbd = dev_get_drvdata(dev);
536 struct net_device *netdev = fbd->netdev;
537 struct fbnic_net *fbn;
538
539 rtnl_lock();
540 fbnic_reset_hw_stats(fbd);
541 rtnl_unlock();
542
543 if (fbnic_init_failure(fbd))
544 return;
545
546 fbn = netdev_priv(netdev);
547
548 if (netif_running(netdev))
549 fbnic_up(fbn);
550
551 netif_device_attach(netdev);
552 }
553
fbnic_pm_resume(struct device * dev)554 static int __maybe_unused fbnic_pm_resume(struct device *dev)
555 {
556 int err;
557
558 err = __fbnic_pm_resume(dev);
559 if (!err)
560 __fbnic_pm_attach(dev);
561
562 return err;
563 }
564
565 static const struct dev_pm_ops fbnic_pm_ops = {
566 SET_SYSTEM_SLEEP_PM_OPS(fbnic_pm_suspend, fbnic_pm_resume)
567 };
568
fbnic_shutdown(struct pci_dev * pdev)569 static void fbnic_shutdown(struct pci_dev *pdev)
570 {
571 fbnic_pm_suspend(&pdev->dev);
572 }
573
fbnic_err_error_detected(struct pci_dev * pdev,pci_channel_state_t state)574 static pci_ers_result_t fbnic_err_error_detected(struct pci_dev *pdev,
575 pci_channel_state_t state)
576 {
577 /* Disconnect device if failure is not recoverable via reset */
578 if (state == pci_channel_io_perm_failure)
579 return PCI_ERS_RESULT_DISCONNECT;
580
581 fbnic_pm_suspend(&pdev->dev);
582
583 /* Request a slot reset */
584 return PCI_ERS_RESULT_NEED_RESET;
585 }
586
fbnic_err_slot_reset(struct pci_dev * pdev)587 static pci_ers_result_t fbnic_err_slot_reset(struct pci_dev *pdev)
588 {
589 int err;
590
591 pci_set_power_state(pdev, PCI_D0);
592 pci_restore_state(pdev);
593
594 if (pci_enable_device_mem(pdev)) {
595 dev_err(&pdev->dev,
596 "Cannot re-enable PCI device after reset.\n");
597 return PCI_ERS_RESULT_DISCONNECT;
598 }
599
600 /* Restore device to previous state */
601 err = __fbnic_pm_resume(&pdev->dev);
602
603 return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
604 }
605
fbnic_err_resume(struct pci_dev * pdev)606 static void fbnic_err_resume(struct pci_dev *pdev)
607 {
608 __fbnic_pm_attach(&pdev->dev);
609 }
610
611 static const struct pci_error_handlers fbnic_err_handler = {
612 .error_detected = fbnic_err_error_detected,
613 .slot_reset = fbnic_err_slot_reset,
614 .resume = fbnic_err_resume,
615 };
616
617 static struct pci_driver fbnic_driver = {
618 .name = fbnic_driver_name,
619 .id_table = fbnic_pci_tbl,
620 .probe = fbnic_probe,
621 .remove = fbnic_remove,
622 .driver.pm = &fbnic_pm_ops,
623 .shutdown = fbnic_shutdown,
624 .err_handler = &fbnic_err_handler,
625 };
626
627 /**
628 * fbnic_init_module - Driver Registration Routine
629 *
630 * The first routine called when the driver is loaded. All it does is
631 * register with the PCI subsystem.
632 *
633 * Return: 0 on success, negative on failure
634 **/
fbnic_init_module(void)635 static int __init fbnic_init_module(void)
636 {
637 int err;
638
639 fbnic_dbg_init();
640
641 err = pci_register_driver(&fbnic_driver);
642 if (err) {
643 fbnic_dbg_exit();
644 goto out;
645 }
646
647 pr_info(DRV_SUMMARY " (%s)", fbnic_driver.name);
648 out:
649 return err;
650 }
651 module_init(fbnic_init_module);
652
653 /**
654 * fbnic_exit_module - Driver Exit Cleanup Routine
655 *
656 * Called just before the driver is removed from memory.
657 **/
fbnic_exit_module(void)658 static void __exit fbnic_exit_module(void)
659 {
660 pci_unregister_driver(&fbnic_driver);
661
662 fbnic_dbg_exit();
663 }
664 module_exit(fbnic_exit_module);
665