Lines Matching +full:wait +full:- +full:monitoring +full:- +full:ns
1 // SPDX-License-Identifier: GPL-2.0-only
8 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
14 * Module initialization and PCIe setup. Card health monitoring and
24 #include <linux/wait.h>
26 #include <linux/dma-mapping.h>
37 MODULE_AUTHOR("Joerg-Stephan Vogt <jsvogt@de.ibm.com>");
59 /* Initial SR-IOV bring-up image */
108 * genwqe_devnode() - Set default access mode for genwqe devices.
110 * @mode: Carrier to pass-back given mode (permissions)
128 * genwqe_dev_alloc() - Create and prepare a new card descriptor
142 return ERR_PTR(-ENODEV); in genwqe_dev_alloc()
146 return ERR_PTR(-ENOMEM); in genwqe_dev_alloc()
148 cd->card_idx = i; in genwqe_dev_alloc()
149 cd->class_genwqe = &class_genwqe; in genwqe_dev_alloc()
150 cd->debugfs_genwqe = debugfs_genwqe; in genwqe_dev_alloc()
156 cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY; in genwqe_dev_alloc()
158 init_waitqueue_head(&cd->queue_waitq); in genwqe_dev_alloc()
160 spin_lock_init(&cd->file_lock); in genwqe_dev_alloc()
161 INIT_LIST_HEAD(&cd->file_list); in genwqe_dev_alloc()
163 cd->card_state = GENWQE_CARD_UNUSED; in genwqe_dev_alloc()
164 spin_lock_init(&cd->print_lock); in genwqe_dev_alloc()
166 cd->ddcb_software_timeout = GENWQE_DDCB_SOFTWARE_TIMEOUT; in genwqe_dev_alloc()
167 cd->kill_timeout = GENWQE_KILL_TIMEOUT; in genwqe_dev_alloc()
170 cd->vf_jobtimeout_msec[j] = GENWQE_VF_JOBTIMEOUT_MSEC; in genwqe_dev_alloc()
181 genwqe_devices[cd->card_idx] = NULL; in genwqe_dev_free()
186 * genwqe_bus_reset() - Card recovery
197 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_bus_reset()
200 if (cd->err_inject & GENWQE_INJECT_BUS_RESET_FAILURE) in genwqe_bus_reset()
201 return -EIO; in genwqe_bus_reset()
203 mmio = cd->mmio; in genwqe_bus_reset()
204 cd->mmio = NULL; in genwqe_bus_reset()
211 * Settings like enable bus-mastering, ... are backuped and in genwqe_bus_reset()
214 dev_dbg(&pci_dev->dev, "[%s] pci_reset function ...\n", __func__); in genwqe_bus_reset()
217 dev_err(&pci_dev->dev, in genwqe_bus_reset()
221 dev_dbg(&pci_dev->dev, "[%s] done with rc=%d\n", __func__, rc); in genwqe_bus_reset()
227 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | in genwqe_bus_reset()
233 dev_err(&pci_dev->dev, in genwqe_bus_reset()
235 return -EIO; in genwqe_bus_reset()
238 cd->mmio = pci_iomap(pci_dev, 0, 0); in genwqe_bus_reset()
239 if (cd->mmio == NULL) { in genwqe_bus_reset()
240 dev_err(&pci_dev->dev, in genwqe_bus_reset()
242 return -ENOMEM; in genwqe_bus_reset()
248 * Hardware circumvention section. Certain bitstreams in our test-lab
261 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; in genwqe_need_err_masking()
266 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_tweak_hardware()
269 if (((cd->slu_unitcfg & 0xFFFF0ull) >= 0x32000ull) && in genwqe_tweak_hardware()
270 ((cd->slu_unitcfg & 0xFFFF0ull) <= 0x33250ull)) { in genwqe_tweak_hardware()
271 dev_warn(&pci_dev->dev, in genwqe_tweak_hardware()
273 cd->slu_unitcfg, cd->app_unitcfg); in genwqe_tweak_hardware()
284 * genwqe_recovery_on_fatal_gfir_required() - Version depended actions
287 * Bitstreams older than 2013-02-17 have a bug where fatal GFIRs must
290 * test-lab.
294 return (cd->slu_unitcfg & 0xFFFF0ull) >= 0x32170ull; in genwqe_recovery_on_fatal_gfir_required()
299 return (cd->slu_unitcfg & 0xFFFF0ull) < 0x32170ull; in genwqe_flash_readback_fails()
303 * genwqe_T_psec() - Calculate PF/VF timeout register content
316 u16 speed; /* 1/f -> 250, 200, 166, 175 */ in genwqe_T_psec()
319 speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); in genwqe_T_psec()
321 return -1; /* illegal value */ in genwqe_T_psec()
327 * genwqe_setup_pf_jtimer() - Setup PF hardware timeouts for DDCB execution
333 * The max. timeout value is 2^(10+x) * T (6ns for 166MHz) * 15/16.
334 * The min. timeout value is 2^(10+x) * T (6ns for 166MHz) * 14/16.
346 16000000000uL/(T * 15)) - 10; in genwqe_setup_pf_jtimer()
354 * genwqe_setup_vf_jtimer() - Setup VF hardware timeouts for DDCB execution
359 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_setup_vf_jtimer()
371 if (cd->vf_jobtimeout_msec[vf] == 0) in genwqe_setup_vf_jtimer()
374 x = ilog2(cd->vf_jobtimeout_msec[vf] * in genwqe_setup_vf_jtimer()
375 16000000000uL/(T * 15)) - 10; in genwqe_setup_vf_jtimer()
404 cd->ffdc[type].entries = e; in genwqe_ffdc_buffs_alloc()
405 cd->ffdc[type].regs = in genwqe_ffdc_buffs_alloc()
421 kfree(cd->ffdc[type].regs); in genwqe_ffdc_buffs_free()
422 cd->ffdc[type].regs = NULL; in genwqe_ffdc_buffs_free()
430 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_read_ids()
432 cd->slu_unitcfg = __genwqe_readq(cd, IO_SLU_UNITCFG); in genwqe_read_ids()
433 if (cd->slu_unitcfg == IO_ILLEGAL_VALUE) { in genwqe_read_ids()
434 dev_err(&pci_dev->dev, in genwqe_read_ids()
435 "err: SLUID=%016llx\n", cd->slu_unitcfg); in genwqe_read_ids()
436 err = -EIO; in genwqe_read_ids()
442 dev_err(&pci_dev->dev, in genwqe_read_ids()
444 err = -ENOENT; in genwqe_read_ids()
448 cd->app_unitcfg = __genwqe_readq(cd, IO_APP_UNITCFG); in genwqe_read_ids()
449 if (cd->app_unitcfg == IO_ILLEGAL_VALUE) { in genwqe_read_ids()
450 dev_err(&pci_dev->dev, in genwqe_read_ids()
451 "err: APPID=%016llx\n", cd->app_unitcfg); in genwqe_read_ids()
452 err = -EIO; in genwqe_read_ids()
455 genwqe_read_app_id(cd, cd->app_name, sizeof(cd->app_name)); in genwqe_read_ids()
464 if (pci_dev->is_virtfn) in genwqe_read_ids()
465 cd->is_privileged = 0; in genwqe_read_ids()
467 cd->is_privileged = (__genwqe_readq(cd, IO_SLU_BITSTREAM) in genwqe_read_ids()
477 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_start()
489 genwqe_read_ffdc_regs(cd, cd->ffdc[GENWQE_DBG_REGS].regs, in genwqe_start()
490 cd->ffdc[GENWQE_DBG_REGS].entries, 0); in genwqe_start()
493 cd->ffdc[GENWQE_DBG_UNIT0].regs, in genwqe_start()
494 cd->ffdc[GENWQE_DBG_UNIT0].entries); in genwqe_start()
497 cd->ffdc[GENWQE_DBG_UNIT1].regs, in genwqe_start()
498 cd->ffdc[GENWQE_DBG_UNIT1].entries); in genwqe_start()
501 cd->ffdc[GENWQE_DBG_UNIT2].regs, in genwqe_start()
502 cd->ffdc[GENWQE_DBG_UNIT2].entries); in genwqe_start()
506 if (cd->card_state == GENWQE_CARD_FATAL_ERROR) { in genwqe_start()
507 dev_warn(&pci_dev->dev, in genwqe_start()
514 cd->softreset = 0x7Cull; in genwqe_start()
516 cd->softreset); in genwqe_start()
520 dev_err(&pci_dev->dev, in genwqe_start()
527 * Re-read the IDs because in genwqe_start()
539 dev_err(&pci_dev->dev, in genwqe_start()
541 err = -ENODEV; in genwqe_start()
554 dev_err(&pci_dev->dev, in genwqe_start()
565 return -EIO; in genwqe_start()
569 * genwqe_stop() - Stop card operation
588 pci_disable_sriov(cd->pci_dev); /* access pci config space */ in genwqe_stop()
596 * genwqe_recover_card() - Try to recover the card if it is possible
610 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_recover_card()
619 cd->softreset = 0x70ull; in genwqe_recover_card()
620 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, cd->softreset); in genwqe_recover_card()
625 dev_err(&pci_dev->dev, in genwqe_recover_card()
632 dev_err(&pci_dev->dev, in genwqe_recover_card()
647 * genwqe_fir_checking() - Check the fault isolation registers of the card
661 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_fir_checking()
666 dev_err(&pci_dev->dev, "* exit looping after %d times\n", in genwqe_fir_checking()
673 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", in genwqe_fir_checking()
697 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fir_addr, fir); in genwqe_fir_checking()
705 dev_err(&pci_dev->dev, "* 0x%08x 0x%016llx\n", fec_addr, fec); in genwqe_fir_checking()
720 dev_err(&pci_dev->dev, in genwqe_fir_checking()
728 dev_err(&pci_dev->dev, in genwqe_fir_checking()
749 dev_dbg(&pci_dev->dev, in genwqe_fir_checking()
754 * note, these cannot be error-Firs in genwqe_fir_checking()
766 dev_dbg(&pci_dev->dev, in genwqe_fir_checking()
781 dev_dbg(&pci_dev->dev, "ACK! Another FIR! Recursing %d!\n", in genwqe_fir_checking()
792 * genwqe_pci_fundamental_reset() - trigger a PCIe fundamental reset on the slot
815 /* Wait for 2s to reload flash and train the link */ in genwqe_pci_fundamental_reset()
826 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_platform_recovery()
829 dev_info(&pci_dev->dev, in genwqe_platform_recovery()
833 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE | in genwqe_platform_recovery()
844 dev_info(&pci_dev->dev, in genwqe_platform_recovery()
847 dev_err(&pci_dev->dev, in genwqe_platform_recovery()
851 dev_err(&pci_dev->dev, in genwqe_platform_recovery()
859 * genwqe_reload_bistream() - reload card bitstream
869 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_reload_bistream()
872 dev_info(&pci_dev->dev, in genwqe_reload_bistream()
883 (cd->softreset & 0xcull) | 0x70ull); in genwqe_reload_bistream()
893 dev_err(&pci_dev->dev, in genwqe_reload_bistream()
900 dev_err(&pci_dev->dev, in genwqe_reload_bistream()
905 dev_info(&pci_dev->dev, in genwqe_reload_bistream()
912 * genwqe_health_thread() - Health checking thread
918 * is when we read registers which contain -1 (IO_ILLEGAL_VALUE). In
924 * Condition for the health-thread to trigger:
935 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_health_thread()
940 rc = wait_event_interruptible_timeout(cd->health_waitq, in genwqe_health_thread()
949 dev_err(&pci_dev->dev, in genwqe_health_thread()
956 dev_err(&pci_dev->dev, in genwqe_health_thread()
964 dev_err(&pci_dev->dev, in genwqe_health_thread()
972 dev_err(&pci_dev->dev, in genwqe_health_thread()
987 if ((gfir_masked) && !cd->skip_recovery && in genwqe_health_thread()
990 cd->card_state = GENWQE_CARD_FATAL_ERROR; in genwqe_health_thread()
999 if (cd->card_state == GENWQE_CARD_RELOAD_BITSTREAM) { in genwqe_health_thread()
1006 cd->last_gfir = gfir; in genwqe_health_thread()
1013 if (cd->use_platform_recovery) { in genwqe_health_thread()
1016 * by the platform until we do a non-raw MMIO or config space in genwqe_health_thread()
1019 readq(cd->mmio + IO_SLC_CFGREG_GFIR); in genwqe_health_thread()
1023 return -EIO; in genwqe_health_thread()
1027 * to recover from a fatal error. Otherwise, we continue to wait in genwqe_health_thread()
1035 dev_err(&pci_dev->dev, in genwqe_health_thread()
1039 cd->card_state = GENWQE_CARD_FATAL_ERROR; in genwqe_health_thread()
1042 /* genwqe_bus_reset failed(). Now wait for genwqe_remove(). */ in genwqe_health_thread()
1046 return -EIO; in genwqe_health_thread()
1057 /* init_waitqueue_head(&cd->health_waitq); */ in genwqe_health_check_start()
1059 cd->health_thread = kthread_run(genwqe_health_thread, cd, in genwqe_health_check_start()
1061 cd->card_idx); in genwqe_health_check_start()
1062 if (IS_ERR(cd->health_thread)) { in genwqe_health_check_start()
1063 rc = PTR_ERR(cd->health_thread); in genwqe_health_check_start()
1064 cd->health_thread = NULL; in genwqe_health_check_start()
1072 return cd->health_thread != NULL; in genwqe_health_thread_running()
1078 return -EIO; in genwqe_health_check_stop()
1080 kthread_stop(cd->health_thread); in genwqe_health_check_stop()
1081 cd->health_thread = NULL; in genwqe_health_check_stop()
1086 * genwqe_pci_setup() - Allocate PCIe related resources for our card
1092 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_pci_setup()
1096 dev_err(&pci_dev->dev, in genwqe_pci_setup()
1104 dev_err(&pci_dev->dev, in genwqe_pci_setup()
1106 err = -EIO; in genwqe_pci_setup()
1110 /* check for 64-bit DMA address supported (DAC) */ in genwqe_pci_setup()
1111 /* check for 32-bit DMA address supported (SAC) */ in genwqe_pci_setup()
1112 if (dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)) && in genwqe_pci_setup()
1113 dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32))) { in genwqe_pci_setup()
1114 dev_err(&pci_dev->dev, in genwqe_pci_setup()
1116 err = -EIO; in genwqe_pci_setup()
1123 pci_dev->needs_freset = 1; in genwqe_pci_setup()
1125 /* request complete BAR-0 space (length = 0) */ in genwqe_pci_setup()
1126 cd->mmio_len = pci_resource_len(pci_dev, 0); in genwqe_pci_setup()
1127 cd->mmio = pci_iomap(pci_dev, 0, 0); in genwqe_pci_setup()
1128 if (cd->mmio == NULL) { in genwqe_pci_setup()
1129 dev_err(&pci_dev->dev, in genwqe_pci_setup()
1131 err = -ENOMEM; in genwqe_pci_setup()
1135 cd->num_vfs = pci_sriov_get_totalvfs(pci_dev); in genwqe_pci_setup()
1136 if (cd->num_vfs < 0) in genwqe_pci_setup()
1137 cd->num_vfs = 0; in genwqe_pci_setup()
1146 pci_iounmap(pci_dev, cd->mmio); in genwqe_pci_setup()
1156 * genwqe_pci_remove() - Free PCIe related resources for our card
1161 struct pci_dev *pci_dev = cd->pci_dev; in genwqe_pci_remove()
1163 if (cd->mmio) in genwqe_pci_remove()
1164 pci_iounmap(pci_dev, cd->mmio); in genwqe_pci_remove()
1171 * genwqe_probe() - Device initialization
1189 dev_err(&pci_dev->dev, "err: could not alloc mem (err=%d)!\n", in genwqe_probe()
1194 dev_set_drvdata(&pci_dev->dev, cd); in genwqe_probe()
1195 cd->pci_dev = pci_dev; in genwqe_probe()
1199 dev_err(&pci_dev->dev, in genwqe_probe()
1206 dev_err(&pci_dev->dev, in genwqe_probe()
1214 dev_err(&pci_dev->dev, in genwqe_probe()
1232 * genwqe_remove() - Called when device is removed (hot-plugable)
1239 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); in genwqe_remove()
1254 * genwqe_err_error_detected() - Error detection callback
1266 dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); in genwqe_err_error_detected()
1268 cd = dev_get_drvdata(&pci_dev->dev); in genwqe_err_error_detected()
1292 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); in genwqe_err_slot_reset()
1298 dev_err(&pci_dev->dev, in genwqe_err_slot_reset()
1312 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); in genwqe_err_resume()
1318 dev_err(&pci_dev->dev, in genwqe_err_resume()
1322 dev_err(&pci_dev->dev, in genwqe_err_resume()
1330 struct genwqe_dev *cd = dev_get_drvdata(&dev->dev); in genwqe_sriov_configure()
1363 * genwqe_init_module() - Driver registration and initialization
1372 return -ENOMEM; in genwqe_init_module()
1392 * genwqe_exit_module() - Driver exit