xref: /linux/drivers/net/ethernet/meta/fbnic/fbnic_pci.c (revision 43dfc13ca972988e620a6edb72956981b75ab6b0)
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) Meta Platforms, Inc. and affiliates. */
3 
4 #include <linux/init.h>
5 #include <linux/module.h>
6 #include <linux/pci.h>
7 #include <linux/rtnetlink.h>
8 #include <linux/types.h>
9 #include <net/devlink.h>
10 
11 #include "fbnic.h"
12 #include "fbnic_drvinfo.h"
13 #include "fbnic_hw_stats.h"
14 #include "fbnic_netdev.h"
15 
16 char fbnic_driver_name[] = DRV_NAME;
17 
18 MODULE_DESCRIPTION(DRV_SUMMARY);
19 MODULE_LICENSE("GPL");
20 
21 static const struct fbnic_info fbnic_asic_info = {
22 	.max_num_queues = FBNIC_MAX_QUEUES,
23 	.bar_mask = BIT(0) | BIT(4)
24 };
25 
26 static const struct fbnic_info *fbnic_info_tbl[] = {
27 	[fbnic_board_asic] = &fbnic_asic_info,
28 };
29 
30 static const struct pci_device_id fbnic_pci_tbl[] = {
31 	{ PCI_DEVICE_DATA(META, FBNIC_ASIC, fbnic_board_asic) },
32 	/* Required last entry */
33 	{0, }
34 };
35 MODULE_DEVICE_TABLE(pci, fbnic_pci_tbl);
36 
37 u32 fbnic_rd32(struct fbnic_dev *fbd, u32 reg)
38 {
39 	u32 __iomem *csr = READ_ONCE(fbd->uc_addr0);
40 	u32 value;
41 
42 	if (!csr)
43 		return ~0U;
44 
45 	value = readl(csr + reg);
46 
47 	/* If any bits are 0 value should be valid */
48 	if (~value)
49 		return value;
50 
51 	/* All 1's may be valid if ZEROs register still works */
52 	if (reg != FBNIC_MASTER_SPARE_0 && ~readl(csr + FBNIC_MASTER_SPARE_0))
53 		return value;
54 
55 	/* Hardware is giving us all 1's reads, assume it is gone */
56 	WRITE_ONCE(fbd->uc_addr0, NULL);
57 	WRITE_ONCE(fbd->uc_addr4, NULL);
58 
59 	dev_err(fbd->dev,
60 		"Failed read (idx 0x%x AKA addr 0x%x), disabled CSR access, awaiting reset\n",
61 		reg, reg << 2);
62 
63 	/* Notify stack that device has lost (PCIe) link */
64 	if (!fbnic_init_failure(fbd))
65 		netif_device_detach(fbd->netdev);
66 
67 	return ~0U;
68 }
69 
70 bool fbnic_fw_present(struct fbnic_dev *fbd)
71 {
72 	return !!READ_ONCE(fbd->uc_addr4);
73 }
74 
75 void fbnic_fw_wr32(struct fbnic_dev *fbd, u32 reg, u32 val)
76 {
77 	u32 __iomem *csr = READ_ONCE(fbd->uc_addr4);
78 
79 	if (csr)
80 		writel(val, csr + reg);
81 }
82 
83 u32 fbnic_fw_rd32(struct fbnic_dev *fbd, u32 reg)
84 {
85 	u32 __iomem *csr = READ_ONCE(fbd->uc_addr4);
86 	u32 value;
87 
88 	if (!csr)
89 		return ~0U;
90 
91 	value = readl(csr + reg);
92 
93 	/* If any bits are 0 value should be valid */
94 	if (~value)
95 		return value;
96 
97 	/* All 1's may be valid if ZEROs register still works */
98 	if (reg != FBNIC_FW_ZERO_REG && ~readl(csr + FBNIC_FW_ZERO_REG))
99 		return value;
100 
101 	/* Hardware is giving us all 1's reads, assume it is gone */
102 	WRITE_ONCE(fbd->uc_addr0, NULL);
103 	WRITE_ONCE(fbd->uc_addr4, NULL);
104 
105 	dev_err(fbd->dev,
106 		"Failed read (idx 0x%x AKA addr 0x%x), disabled CSR access, awaiting reset\n",
107 		reg, reg << 2);
108 
109 	/* Notify stack that device has lost (PCIe) link */
110 	if (!fbnic_init_failure(fbd))
111 		netif_device_detach(fbd->netdev);
112 
113 	return ~0U;
114 }
115 
116 static void fbnic_service_task_start(struct fbnic_net *fbn)
117 {
118 	struct fbnic_dev *fbd = fbn->fbd;
119 
120 	schedule_delayed_work(&fbd->service_task, HZ);
121 }
122 
123 static void fbnic_service_task_stop(struct fbnic_net *fbn)
124 {
125 	struct fbnic_dev *fbd = fbn->fbd;
126 
127 	cancel_delayed_work(&fbd->service_task);
128 }
129 
130 void fbnic_up(struct fbnic_net *fbn)
131 {
132 	fbnic_enable(fbn);
133 
134 	fbnic_fill(fbn);
135 
136 	fbnic_rss_reinit_hw(fbn->fbd, fbn);
137 
138 	__fbnic_set_rx_mode(fbn->fbd);
139 
140 	/* Enable Tx/Rx processing */
141 	fbnic_napi_enable(fbn);
142 	netif_tx_start_all_queues(fbn->netdev);
143 
144 	fbnic_service_task_start(fbn);
145 }
146 
147 void fbnic_down_noidle(struct fbnic_net *fbn)
148 {
149 	fbnic_service_task_stop(fbn);
150 
151 	/* Disable Tx/Rx Processing */
152 	fbnic_napi_disable(fbn);
153 	netif_tx_disable(fbn->netdev);
154 
155 	fbnic_clear_rx_mode(fbn->fbd);
156 	fbnic_clear_rules(fbn->fbd);
157 	fbnic_rss_disable_hw(fbn->fbd);
158 	fbnic_disable(fbn);
159 }
160 
161 void fbnic_down(struct fbnic_net *fbn)
162 {
163 	fbnic_down_noidle(fbn);
164 
165 	fbnic_wait_all_queues_idle(fbn->fbd, false);
166 
167 	fbnic_flush(fbn);
168 }
169 
170 static int fbnic_fw_config_after_crash(struct fbnic_dev *fbd)
171 {
172 	if (fbnic_fw_xmit_ownership_msg(fbd, true)) {
173 		dev_err(fbd->dev, "NIC failed to take ownership\n");
174 
175 		return -1;
176 	}
177 
178 	fbnic_rpc_reset_valid_entries(fbd);
179 	__fbnic_set_rx_mode(fbd);
180 
181 	return 0;
182 }
183 
184 static void fbnic_health_check(struct fbnic_dev *fbd)
185 {
186 	struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX];
187 
188 	/* As long as the heart is beating the FW is healthy */
189 	if (fbd->fw_heartbeat_enabled)
190 		return;
191 
192 	/* If the Tx mailbox still has messages sitting in it then there likely
193 	 * isn't anything we can do. We will wait until the mailbox is empty to
194 	 * report the fault so we can collect the crashlog.
195 	 */
196 	if (tx_mbx->head != tx_mbx->tail)
197 		return;
198 
199 	fbnic_devlink_fw_report(fbd, "Firmware crash detected!");
200 	fbnic_devlink_otp_check(fbd, "error detected after firmware recovery");
201 
202 	if (fbnic_fw_config_after_crash(fbd))
203 		dev_err(fbd->dev, "Firmware recovery failed after crash\n");
204 }
205 
206 static void fbnic_service_task(struct work_struct *work)
207 {
208 	struct fbnic_dev *fbd = container_of(to_delayed_work(work),
209 					     struct fbnic_dev, service_task);
210 	struct net_device *netdev = fbd->netdev;
211 
212 	if (netif_running(netdev))
213 		fbnic_phylink_pmd_training_complete_notify(netdev);
214 
215 	rtnl_lock();
216 
217 	fbnic_get_hw_stats32(fbd);
218 
219 	fbnic_fw_check_heartbeat(fbd);
220 
221 	fbnic_health_check(fbd);
222 
223 	fbnic_bmc_rpc_check(fbd);
224 
225 	if (netif_carrier_ok(fbd->netdev)) {
226 		netdev_lock(fbd->netdev);
227 		fbnic_napi_depletion_check(fbd->netdev);
228 		netdev_unlock(fbd->netdev);
229 	}
230 
231 	if (netif_running(netdev))
232 		schedule_delayed_work(&fbd->service_task, HZ);
233 
234 	rtnl_unlock();
235 }
236 
237 /**
238  * fbnic_probe - Device Initialization Routine
239  * @pdev: PCI device information struct
240  * @ent: entry in fbnic_pci_tbl
241  *
242  * Initializes a PCI device identified by a pci_dev structure.
243  * The OS initialization, configuring of the adapter private structure,
244  * and a hardware reset occur.
245  *
246  * Return: 0 on success, negative on failure
247  **/
248 static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
249 {
250 	const struct fbnic_info *info = fbnic_info_tbl[ent->driver_data];
251 	struct net_device *netdev;
252 	struct fbnic_dev *fbd;
253 	int err;
254 
255 	if (pdev->error_state != pci_channel_io_normal) {
256 		dev_err(&pdev->dev,
257 			"PCI device still in an error state. Unable to load...\n");
258 		return -EIO;
259 	}
260 
261 	err = pcim_enable_device(pdev);
262 	if (err) {
263 		dev_err(&pdev->dev, "PCI enable device failed: %d\n", err);
264 		return err;
265 	}
266 
267 	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(46));
268 	if (err)
269 		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
270 	if (err) {
271 		dev_err(&pdev->dev, "DMA configuration failed: %d\n", err);
272 		return err;
273 	}
274 
275 	err = pcim_iomap_regions(pdev, info->bar_mask, fbnic_driver_name);
276 	if (err) {
277 		dev_err(&pdev->dev,
278 			"pci_request_selected_regions failed: %d\n", err);
279 		return err;
280 	}
281 
282 	fbd = fbnic_devlink_alloc(pdev);
283 	if (!fbd) {
284 		dev_err(&pdev->dev, "Devlink allocation failed\n");
285 		return -ENOMEM;
286 	}
287 
288 	err = fbnic_devlink_health_create(fbd);
289 	if (err)
290 		goto free_fbd;
291 
292 	/* Populate driver with hardware-specific info and handlers */
293 	fbd->max_num_queues = info->max_num_queues;
294 
295 	pci_set_master(pdev);
296 	pci_save_state(pdev);
297 
298 	INIT_DELAYED_WORK(&fbd->service_task, fbnic_service_task);
299 
300 	err = fbnic_alloc_irqs(fbd);
301 	if (err)
302 		goto err_destroy_health;
303 
304 	err = fbnic_mac_init(fbd);
305 	if (err) {
306 		dev_err(&pdev->dev, "Failed to initialize MAC: %d\n", err);
307 		goto free_irqs;
308 	}
309 
310 	err = fbnic_fw_request_mbx(fbd);
311 	if (err) {
312 		dev_err(&pdev->dev,
313 			"Firmware mailbox initialization failure\n");
314 		goto free_irqs;
315 	}
316 
317 	/* Send the request to enable the FW logging to host. Note if this
318 	 * fails we ignore the error and just display a message as it is
319 	 * possible the FW is just too old to support the logging and needs
320 	 * to be updated.
321 	 */
322 	err = fbnic_fw_log_init(fbd);
323 	if (err)
324 		dev_warn(fbd->dev,
325 			 "Unable to initialize firmware log buffer: %d\n",
326 			 err);
327 
328 	fbnic_devlink_register(fbd);
329 	fbnic_devlink_otp_check(fbd, "error detected during probe");
330 	fbnic_dbg_fbd_init(fbd);
331 
332 	/* Capture snapshot of hardware stats so netdev can calculate delta */
333 	fbnic_init_hw_stats(fbd);
334 
335 	fbnic_hwmon_register(fbd);
336 
337 	if (!fbd->dsn) {
338 		dev_warn(&pdev->dev, "Reading serial number failed\n");
339 		goto init_failure_mode;
340 	}
341 
342 	if (fbnic_mdiobus_create(fbd))
343 		goto init_failure_mode;
344 
345 	netdev = fbnic_netdev_alloc(fbd);
346 	if (!netdev) {
347 		dev_err(&pdev->dev, "Netdev allocation failed\n");
348 		goto init_failure_mode;
349 	}
350 
351 	err = fbnic_ptp_setup(fbd);
352 	if (err)
353 		goto ifm_free_netdev;
354 
355 	err = fbnic_netdev_register(netdev);
356 	if (err) {
357 		dev_err(&pdev->dev, "Netdev registration failed: %d\n", err);
358 		goto ifm_destroy_ptp;
359 	}
360 
361 	return 0;
362 
363 ifm_destroy_ptp:
364 	fbnic_ptp_destroy(fbd);
365 ifm_free_netdev:
366 	fbnic_netdev_free(fbd);
367 init_failure_mode:
368 	dev_warn(&pdev->dev, "Probe error encountered, entering init failure mode. Normal networking functionality will not be available.\n");
369 	 /* Always return 0 even on error so devlink is registered to allow
370 	  * firmware updates for fixes.
371 	  */
372 	return 0;
373 free_irqs:
374 	fbnic_free_irqs(fbd);
375 err_destroy_health:
376 	fbnic_devlink_health_destroy(fbd);
377 free_fbd:
378 	fbnic_devlink_free(fbd);
379 
380 	return err;
381 }
382 
383 /**
384  * fbnic_remove - Device Removal Routine
385  * @pdev: PCI device information struct
386  *
387  * Called by the PCI subsystem to alert the driver that it should release
388  * a PCI device.  This could be caused by a Hot-Plug event, or because the
389  * driver is going to be removed from memory.
390  **/
391 static void fbnic_remove(struct pci_dev *pdev)
392 {
393 	struct fbnic_dev *fbd = pci_get_drvdata(pdev);
394 
395 	if (!fbnic_init_failure(fbd)) {
396 		struct net_device *netdev = fbd->netdev;
397 
398 		fbnic_netdev_unregister(netdev);
399 		cancel_delayed_work_sync(&fbd->service_task);
400 		fbnic_ptp_destroy(fbd);
401 		fbnic_netdev_free(fbd);
402 	}
403 
404 	fbnic_hwmon_unregister(fbd);
405 	fbnic_dbg_fbd_exit(fbd);
406 	fbnic_devlink_unregister(fbd);
407 	fbnic_fw_log_free(fbd);
408 	fbnic_fw_free_mbx(fbd);
409 	fbnic_free_irqs(fbd);
410 
411 	fbnic_devlink_health_destroy(fbd);
412 	fbnic_devlink_free(fbd);
413 }
414 
415 static int fbnic_pm_suspend(struct device *dev)
416 {
417 	struct fbnic_dev *fbd = dev_get_drvdata(dev);
418 	struct net_device *netdev = fbd->netdev;
419 
420 	if (fbnic_init_failure(fbd))
421 		goto null_uc_addr;
422 
423 	rtnl_lock();
424 	netdev_lock(netdev);
425 
426 	netif_device_detach(netdev);
427 
428 	if (netif_running(netdev))
429 		netdev->netdev_ops->ndo_stop(netdev);
430 
431 	netdev_unlock(netdev);
432 	rtnl_unlock();
433 
434 null_uc_addr:
435 	fbnic_fw_log_disable(fbd);
436 
437 	devl_lock(priv_to_devlink(fbd));
438 
439 	fbnic_fw_free_mbx(fbd);
440 
441 	devl_unlock(priv_to_devlink(fbd));
442 
443 	/* Free the IRQs so they aren't trying to occupy sleeping CPUs */
444 	fbnic_free_irqs(fbd);
445 
446 	/* Hardware is about to go away, so switch off MMIO access internally */
447 	WRITE_ONCE(fbd->uc_addr0, NULL);
448 	WRITE_ONCE(fbd->uc_addr4, NULL);
449 
450 	return 0;
451 }
452 
453 static int __fbnic_pm_resume(struct device *dev)
454 {
455 	struct fbnic_dev *fbd = dev_get_drvdata(dev);
456 	struct net_device *netdev = fbd->netdev;
457 	void __iomem * const *iomap_table;
458 	struct fbnic_net *fbn;
459 	int err;
460 
461 	/* Restore MMIO access */
462 	iomap_table = pcim_iomap_table(to_pci_dev(dev));
463 	fbd->uc_addr0 = iomap_table[0];
464 	fbd->uc_addr4 = iomap_table[4];
465 
466 	/* Rerequest the IRQs */
467 	err = fbnic_alloc_irqs(fbd);
468 	if (err)
469 		goto err_invalidate_uc_addr;
470 
471 	fbd->mac->init_regs(fbd);
472 
473 	devl_lock(priv_to_devlink(fbd));
474 
475 	/* Re-enable mailbox */
476 	err = fbnic_fw_request_mbx(fbd);
477 	devl_unlock(priv_to_devlink(fbd));
478 	if (err)
479 		goto err_free_irqs;
480 
481 	/* Only send log history if log buffer is empty to prevent duplicate
482 	 * log entries.
483 	 */
484 	fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries));
485 
486 	/* Since the FW should be up, check if it reported OTP errors */
487 	fbnic_devlink_otp_check(fbd, "error detected after PM resume");
488 
489 	/* No netdev means there isn't a network interface to bring up */
490 	if (fbnic_init_failure(fbd))
491 		return 0;
492 
493 	fbn = netdev_priv(netdev);
494 
495 	/* Reset the queues if needed */
496 	fbnic_reset_queues(fbn, fbn->num_tx_queues, fbn->num_rx_queues);
497 
498 	rtnl_lock();
499 	netdev_lock(netdev);
500 
501 	if (netif_running(netdev))
502 		err = __fbnic_open(fbn);
503 
504 	netdev_unlock(netdev);
505 	rtnl_unlock();
506 	if (err)
507 		goto err_free_mbx;
508 
509 	return 0;
510 err_free_mbx:
511 	fbnic_fw_log_disable(fbd);
512 
513 	devl_lock(priv_to_devlink(fbd));
514 	fbnic_fw_free_mbx(fbd);
515 	devl_unlock(priv_to_devlink(fbd));
516 err_free_irqs:
517 	fbnic_free_irqs(fbd);
518 err_invalidate_uc_addr:
519 	WRITE_ONCE(fbd->uc_addr0, NULL);
520 	WRITE_ONCE(fbd->uc_addr4, NULL);
521 	return err;
522 }
523 
524 static void __fbnic_pm_attach(struct device *dev)
525 {
526 	struct fbnic_dev *fbd = dev_get_drvdata(dev);
527 	struct net_device *netdev = fbd->netdev;
528 	struct fbnic_net *fbn;
529 
530 	rtnl_lock();
531 	fbnic_reset_hw_stats(fbd);
532 	rtnl_unlock();
533 
534 	if (fbnic_init_failure(fbd))
535 		return;
536 
537 	fbn = netdev_priv(netdev);
538 
539 	if (netif_running(netdev))
540 		fbnic_up(fbn);
541 
542 	netif_device_attach(netdev);
543 }
544 
545 static int __maybe_unused fbnic_pm_resume(struct device *dev)
546 {
547 	int err;
548 
549 	err = __fbnic_pm_resume(dev);
550 	if (!err)
551 		__fbnic_pm_attach(dev);
552 
553 	return err;
554 }
555 
556 static const struct dev_pm_ops fbnic_pm_ops = {
557 	SET_SYSTEM_SLEEP_PM_OPS(fbnic_pm_suspend, fbnic_pm_resume)
558 };
559 
560 static void fbnic_shutdown(struct pci_dev *pdev)
561 {
562 	fbnic_pm_suspend(&pdev->dev);
563 }
564 
565 static pci_ers_result_t fbnic_err_error_detected(struct pci_dev *pdev,
566 						 pci_channel_state_t state)
567 {
568 	/* Disconnect device if failure is not recoverable via reset */
569 	if (state == pci_channel_io_perm_failure)
570 		return PCI_ERS_RESULT_DISCONNECT;
571 
572 	fbnic_pm_suspend(&pdev->dev);
573 
574 	/* Request a slot reset */
575 	return PCI_ERS_RESULT_NEED_RESET;
576 }
577 
578 static pci_ers_result_t fbnic_err_slot_reset(struct pci_dev *pdev)
579 {
580 	int err;
581 
582 	pci_set_power_state(pdev, PCI_D0);
583 	pci_restore_state(pdev);
584 
585 	if (pci_enable_device_mem(pdev)) {
586 		dev_err(&pdev->dev,
587 			"Cannot re-enable PCI device after reset.\n");
588 		return PCI_ERS_RESULT_DISCONNECT;
589 	}
590 
591 	/* Restore device to previous state */
592 	err = __fbnic_pm_resume(&pdev->dev);
593 
594 	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
595 }
596 
597 static void fbnic_err_resume(struct pci_dev *pdev)
598 {
599 	__fbnic_pm_attach(&pdev->dev);
600 }
601 
602 static const struct pci_error_handlers fbnic_err_handler = {
603 	.error_detected	= fbnic_err_error_detected,
604 	.slot_reset	= fbnic_err_slot_reset,
605 	.resume		= fbnic_err_resume,
606 };
607 
608 static struct pci_driver fbnic_driver = {
609 	.name		= fbnic_driver_name,
610 	.id_table	= fbnic_pci_tbl,
611 	.probe		= fbnic_probe,
612 	.remove		= fbnic_remove,
613 	.driver.pm	= &fbnic_pm_ops,
614 	.shutdown	= fbnic_shutdown,
615 	.err_handler	= &fbnic_err_handler,
616 };
617 
618 /**
619  * fbnic_init_module - Driver Registration Routine
620  *
621  * The first routine called when the driver is loaded.  All it does is
622  * register with the PCI subsystem.
623  *
624  * Return: 0 on success, negative on failure
625  **/
626 static int __init fbnic_init_module(void)
627 {
628 	int err;
629 
630 	fbnic_dbg_init();
631 
632 	err = pci_register_driver(&fbnic_driver);
633 	if (err) {
634 		fbnic_dbg_exit();
635 		goto out;
636 	}
637 
638 	pr_info(DRV_SUMMARY " (%s)", fbnic_driver.name);
639 out:
640 	return err;
641 }
642 module_init(fbnic_init_module);
643 
644 /**
645  * fbnic_exit_module - Driver Exit Cleanup Routine
646  *
647  * Called just before the driver is removed from memory.
648  **/
649 static void __exit fbnic_exit_module(void)
650 {
651 	pci_unregister_driver(&fbnic_driver);
652 
653 	fbnic_dbg_exit();
654 }
655 module_exit(fbnic_exit_module);
656