1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include "regs/xe_hw_error_regs.h" 7 #include "regs/xe_irq_regs.h" 8 9 #include "xe_device.h" 10 #include "xe_hw_error.h" 11 #include "xe_mmio.h" 12 13 /* Error categories reported by hardware */ 14 enum hardware_error { 15 HARDWARE_ERROR_CORRECTABLE = 0, 16 HARDWARE_ERROR_NONFATAL = 1, 17 HARDWARE_ERROR_FATAL = 2, 18 HARDWARE_ERROR_MAX, 19 }; 20 21 static const char *hw_error_to_str(const enum hardware_error hw_err) 22 { 23 switch (hw_err) { 24 case HARDWARE_ERROR_CORRECTABLE: 25 return "CORRECTABLE"; 26 case HARDWARE_ERROR_NONFATAL: 27 return "NONFATAL"; 28 case HARDWARE_ERROR_FATAL: 29 return "FATAL"; 30 default: 31 return "UNKNOWN"; 32 } 33 } 34 35 static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err) 36 { 37 const char *hw_err_str = hw_error_to_str(hw_err); 38 struct xe_device *xe = tile_to_xe(tile); 39 unsigned long flags; 40 u32 err_src; 41 42 if (xe->info.platform != XE_BATTLEMAGE) 43 return; 44 45 spin_lock_irqsave(&xe->irq.lock, flags); 46 err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err)); 47 if (!err_src) { 48 drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported DEV_ERR_STAT_%s blank!\n", 49 tile->id, hw_err_str); 50 goto unlock; 51 } 52 53 /* TODO: Process errrors per source */ 54 55 xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src); 56 57 unlock: 58 spin_unlock_irqrestore(&xe->irq.lock, flags); 59 } 60 61 /** 62 * xe_hw_error_irq_handler - irq handling for hw errors 63 * @tile: tile instance 64 * @master_ctl: value read from master interrupt register 65 * 66 * Xe platforms add three error bits to the master interrupt register to support error handling. 67 * These three bits are used to convey the class of error FATAL, NONFATAL, or CORRECTABLE. 68 * To process the interrupt, determine the source of error by reading the Device Error Source 69 * Register that corresponds to the class of error being serviced. 70 */ 71 void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl) 72 { 73 enum hardware_error hw_err; 74 75 for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++) 76 if (master_ctl & ERROR_IRQ(hw_err)) 77 hw_error_source_handler(tile, hw_err); 78 } 79 80 /* 81 * Process hardware errors during boot 82 */ 83 static void process_hw_errors(struct xe_device *xe) 84 { 85 struct xe_tile *tile; 86 u32 master_ctl; 87 u8 id; 88 89 for_each_tile(tile, xe, id) { 90 master_ctl = xe_mmio_read32(&tile->mmio, GFX_MSTR_IRQ); 91 xe_hw_error_irq_handler(tile, master_ctl); 92 xe_mmio_write32(&tile->mmio, GFX_MSTR_IRQ, master_ctl); 93 } 94 } 95 96 /** 97 * xe_hw_error_init - Initialize hw errors 98 * @xe: xe device instance 99 * 100 * Initialize and check for errors that occurred during boot 101 * prior to driver load 102 */ 103 void xe_hw_error_init(struct xe_device *xe) 104 { 105 if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) 106 return; 107 108 process_hw_errors(xe); 109 } 110