1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2025 Intel Corporation 4 */ 5 6 #include <linux/bitmap.h> 7 #include <linux/fault-inject.h> 8 9 #include "regs/xe_gsc_regs.h" 10 #include "regs/xe_hw_error_regs.h" 11 #include "regs/xe_irq_regs.h" 12 13 #include "xe_device.h" 14 #include "xe_drm_ras.h" 15 #include "xe_hw_error.h" 16 #include "xe_mmio.h" 17 #include "xe_survivability_mode.h" 18 19 #define GT_HW_ERROR_MAX_ERR_BITS 16 20 #define HEC_UNCORR_FW_ERR_BITS 4 21 #define XE_RAS_REG_SIZE 32 22 #define XE_SOC_NUM_IEH 2 23 24 #define PVC_ERROR_MASK_SET(hw_err, err_bit) ((hw_err == HARDWARE_ERROR_CORRECTABLE) ? \ 25 (PVC_COR_ERR_MASK & REG_BIT(err_bit)) : \ 26 (PVC_FAT_ERR_MASK & REG_BIT(err_bit))) 27 28 extern struct fault_attr inject_csc_hw_error; 29 30 static const char * const error_severity[] = DRM_XE_RAS_ERROR_SEVERITY_NAMES; 31 32 static const char * const hec_uncorrected_fw_errors[] = { 33 "Fatal", 34 "CSE Disabled", 35 "FD Corruption", 36 "Data Corruption" 37 }; 38 39 enum gt_vector_regs { 40 ERR_STAT_GT_VECTOR0 = 0, 41 ERR_STAT_GT_VECTOR1, 42 ERR_STAT_GT_VECTOR2, 43 ERR_STAT_GT_VECTOR3, 44 ERR_STAT_GT_VECTOR4, 45 ERR_STAT_GT_VECTOR5, 46 ERR_STAT_GT_VECTOR6, 47 ERR_STAT_GT_VECTOR7, 48 ERR_STAT_GT_VECTOR_MAX 49 }; 50 51 #define PVC_GT_VECTOR_LEN(hw_err) ((hw_err == HARDWARE_ERROR_CORRECTABLE) ? \ 52 ERR_STAT_GT_VECTOR4 : ERR_STAT_GT_VECTOR_MAX) 53 54 static enum drm_xe_ras_error_severity hw_err_to_severity(const enum hardware_error hw_err) 55 { 56 if (hw_err == HARDWARE_ERROR_CORRECTABLE) 57 return DRM_XE_RAS_ERR_SEV_CORRECTABLE; 58 59 /* Uncorrectable errors comprise of both fatal and non-fatal errors */ 60 return DRM_XE_RAS_ERR_SEV_UNCORRECTABLE; 61 } 62 63 static inline u32 err_src_to_id(u32 err_bit) 64 { 65 switch (err_bit) { 66 case XE_GT_ERROR: 67 return DRM_XE_RAS_ERR_COMP_CORE_COMPUTE; 68 case XE_SOC_ERROR: 69 return DRM_XE_RAS_ERR_COMP_SOC_INTERNAL; 70 default: 71 return 0; 72 } 73 } 74 75 static const char * const pvc_master_global_err_reg[] = { 76 [0 ... 1] = "Undefined", 77 [2] = "HBM SS0: Channel0", 78 [3] = "HBM SS0: Channel1", 79 [4] = "HBM SS0: Channel2", 80 [5] = "HBM SS0: Channel3", 81 [6] = "HBM SS0: Channel4", 82 [7] = "HBM SS0: Channel5", 83 [8] = "HBM SS0: Channel6", 84 [9] = "HBM SS0: Channel7", 85 [10] = "HBM SS1: Channel0", 86 [11] = "HBM SS1: Channel1", 87 [12] = "HBM SS1: Channel2", 88 [13] = "HBM SS1: Channel3", 89 [14] = "HBM SS1: Channel4", 90 [15] = "HBM SS1: Channel5", 91 [16] = "HBM SS1: Channel6", 92 [17] = "HBM SS1: Channel7", 93 [18 ... 31] = "Undefined", 94 }; 95 static_assert(ARRAY_SIZE(pvc_master_global_err_reg) == XE_RAS_REG_SIZE); 96 97 static const char * const pvc_slave_global_err_reg[] = { 98 [0] = "Undefined", 99 [1] = "HBM SS2: Channel0", 100 [2] = "HBM SS2: Channel1", 101 [3] = "HBM SS2: Channel2", 102 [4] = "HBM SS2: Channel3", 103 [5] = "HBM SS2: Channel4", 104 [6] = "HBM SS2: Channel5", 105 [7] = "HBM SS2: Channel6", 106 [8] = "HBM SS2: Channel7", 107 [9] = "HBM SS3: Channel0", 108 [10] = "HBM SS3: Channel1", 109 [11] = "HBM SS3: Channel2", 110 [12] = "HBM SS3: Channel3", 111 [13] = "HBM SS3: Channel4", 112 [14] = "HBM SS3: Channel5", 113 [15] = "HBM SS3: Channel6", 114 [16] = "HBM SS3: Channel7", 115 [17] = "Undefined", 116 [18] = "ANR MDFI", 117 [19 ... 31] = "Undefined", 118 }; 119 static_assert(ARRAY_SIZE(pvc_slave_global_err_reg) == XE_RAS_REG_SIZE); 120 121 static const char * const pvc_slave_local_fatal_err_reg[] = { 122 [0] = "Local IEH: Malformed PCIe AER", 123 [1] = "Local IEH: Malformed PCIe ERR", 124 [2] = "Local IEH: UR conditions in IEH", 125 [3] = "Local IEH: From SERR Sources", 126 [4 ... 19] = "Undefined", 127 [20] = "Malformed MCA error packet (HBM/Punit)", 128 [21 ... 31] = "Undefined", 129 }; 130 static_assert(ARRAY_SIZE(pvc_slave_local_fatal_err_reg) == XE_RAS_REG_SIZE); 131 132 static const char * const pvc_master_local_fatal_err_reg[] = { 133 [0] = "Local IEH: Malformed IOSF PCIe AER", 134 [1] = "Local IEH: Malformed IOSF PCIe ERR", 135 [2] = "Local IEH: UR RESPONSE", 136 [3] = "Local IEH: From SERR SPI controller", 137 [4] = "Base Die MDFI T2T", 138 [5] = "Undefined", 139 [6] = "Base Die MDFI T2C", 140 [7] = "Undefined", 141 [8] = "Invalid CSC PSF Command Parity", 142 [9] = "Invalid CSC PSF Unexpected Completion", 143 [10] = "Invalid CSC PSF Unsupported Request", 144 [11] = "Invalid PCIe PSF Command Parity", 145 [12] = "PCIe PSF Unexpected Completion", 146 [13] = "PCIe PSF Unsupported Request", 147 [14 ... 19] = "Undefined", 148 [20] = "Malformed MCA error packet (HBM/Punit)", 149 [21 ... 31] = "Undefined", 150 }; 151 static_assert(ARRAY_SIZE(pvc_master_local_fatal_err_reg) == XE_RAS_REG_SIZE); 152 153 static const char * const pvc_master_local_nonfatal_err_reg[] = { 154 [0 ... 3] = "Undefined", 155 [4] = "Base Die MDFI T2T", 156 [5] = "Undefined", 157 [6] = "Base Die MDFI T2C", 158 [7] = "Undefined", 159 [8] = "Invalid CSC PSF Command Parity", 160 [9] = "Invalid CSC PSF Unexpected Completion", 161 [10] = "Invalid PCIe PSF Command Parity", 162 [11 ... 31] = "Undefined", 163 }; 164 static_assert(ARRAY_SIZE(pvc_master_local_nonfatal_err_reg) == XE_RAS_REG_SIZE); 165 166 #define PVC_MASTER_LOCAL_REG_INFO(hw_err) ((hw_err == HARDWARE_ERROR_FATAL) ? \ 167 pvc_master_local_fatal_err_reg : \ 168 pvc_master_local_nonfatal_err_reg) 169 170 static bool fault_inject_csc_hw_error(void) 171 { 172 return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1); 173 } 174 175 static void csc_hw_error_work(struct work_struct *work) 176 { 177 struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work); 178 struct xe_device *xe = tile_to_xe(tile); 179 180 xe_survivability_mode_runtime_enable(xe); 181 } 182 183 static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err) 184 { 185 const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err); 186 const char *severity_str = error_severity[severity]; 187 struct xe_device *xe = tile_to_xe(tile); 188 struct xe_mmio *mmio = &tile->mmio; 189 u32 base, err_bit, err_src; 190 unsigned long fw_err; 191 192 if (xe->info.platform != XE_BATTLEMAGE) 193 return; 194 195 base = BMG_GSC_HECI1_BASE; 196 lockdep_assert_held(&xe->irq.lock); 197 err_src = xe_mmio_read32(mmio, HEC_UNCORR_ERR_STATUS(base)); 198 if (!err_src) { 199 drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported %s HEC_ERR_STATUS register blank\n", 200 tile->id, severity_str); 201 return; 202 } 203 204 if (err_src & UNCORR_FW_REPORTED_ERR) { 205 fw_err = xe_mmio_read32(mmio, HEC_UNCORR_FW_ERR_DW0(base)); 206 for_each_set_bit(err_bit, &fw_err, HEC_UNCORR_FW_ERR_BITS) { 207 drm_err_ratelimited(&xe->drm, HW_ERR 208 "HEC FW %s %s reported, bit[%d] is set\n", 209 hec_uncorrected_fw_errors[err_bit], severity_str, 210 err_bit); 211 212 schedule_work(&tile->csc_hw_error_work); 213 } 214 } 215 216 xe_mmio_write32(mmio, HEC_UNCORR_ERR_STATUS(base), err_src); 217 } 218 219 static void log_hw_error(struct xe_tile *tile, const char *name, 220 const enum drm_xe_ras_error_severity severity) 221 { 222 const char *severity_str = error_severity[severity]; 223 struct xe_device *xe = tile_to_xe(tile); 224 225 if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE) 226 drm_warn(&xe->drm, "%s %s detected\n", name, severity_str); 227 else 228 drm_err_ratelimited(&xe->drm, "%s %s detected\n", name, severity_str); 229 } 230 231 static void log_gt_err(struct xe_tile *tile, const char *name, int i, u32 err, 232 const enum drm_xe_ras_error_severity severity) 233 { 234 const char *severity_str = error_severity[severity]; 235 struct xe_device *xe = tile_to_xe(tile); 236 237 if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE) 238 drm_warn(&xe->drm, "%s %s detected, ERROR_STAT_GT_VECTOR%d:0x%08x\n", 239 name, severity_str, i, err); 240 else 241 drm_err_ratelimited(&xe->drm, "%s %s detected, ERROR_STAT_GT_VECTOR%d:0x%08x\n", 242 name, severity_str, i, err); 243 } 244 245 static void log_soc_error(struct xe_tile *tile, const char * const *reg_info, 246 const enum drm_xe_ras_error_severity severity, u32 err_bit, u32 index) 247 { 248 const char *severity_str = error_severity[severity]; 249 struct xe_device *xe = tile_to_xe(tile); 250 struct xe_drm_ras *ras = &xe->ras; 251 struct xe_drm_ras_counter *info = ras->info[severity]; 252 const char *name; 253 254 name = reg_info[err_bit]; 255 256 if (strcmp(name, "Undefined")) { 257 if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE) 258 drm_warn(&xe->drm, "%s SOC %s detected", name, severity_str); 259 else 260 drm_err_ratelimited(&xe->drm, "%s SOC %s detected", name, severity_str); 261 atomic_inc(&info[index].counter); 262 } 263 } 264 265 static void gt_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err, 266 u32 error_id) 267 { 268 const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err); 269 struct xe_device *xe = tile_to_xe(tile); 270 struct xe_drm_ras *ras = &xe->ras; 271 struct xe_drm_ras_counter *info = ras->info[severity]; 272 struct xe_mmio *mmio = &tile->mmio; 273 unsigned long err_stat = 0; 274 int i; 275 276 if (xe->info.platform != XE_PVC) 277 return; 278 279 if (hw_err == HARDWARE_ERROR_NONFATAL) { 280 atomic_inc(&info[error_id].counter); 281 log_hw_error(tile, info[error_id].name, severity); 282 return; 283 } 284 285 for (i = 0; i < PVC_GT_VECTOR_LEN(hw_err); i++) { 286 u32 vector, val; 287 288 vector = xe_mmio_read32(mmio, ERR_STAT_GT_VECTOR_REG(hw_err, i)); 289 if (!vector) 290 continue; 291 292 switch (i) { 293 case ERR_STAT_GT_VECTOR0: 294 case ERR_STAT_GT_VECTOR1: { 295 u32 errbit; 296 297 val = hweight32(vector); 298 atomic_add(val, &info[error_id].counter); 299 log_gt_err(tile, "Subslice", i, vector, severity); 300 301 /* 302 * Error status register is only populated once per error. 303 * Read the register and clear once. 304 */ 305 if (err_stat) 306 break; 307 308 err_stat = xe_mmio_read32(mmio, ERR_STAT_GT_REG(hw_err)); 309 for_each_set_bit(errbit, &err_stat, GT_HW_ERROR_MAX_ERR_BITS) { 310 if (PVC_ERROR_MASK_SET(hw_err, errbit)) 311 atomic_inc(&info[error_id].counter); 312 } 313 if (err_stat) 314 xe_mmio_write32(mmio, ERR_STAT_GT_REG(hw_err), err_stat); 315 break; 316 } 317 case ERR_STAT_GT_VECTOR2: 318 case ERR_STAT_GT_VECTOR3: 319 val = hweight32(vector); 320 atomic_add(val, &info[error_id].counter); 321 log_gt_err(tile, "L3 BANK", i, vector, severity); 322 break; 323 case ERR_STAT_GT_VECTOR6: 324 val = hweight32(vector); 325 atomic_add(val, &info[error_id].counter); 326 log_gt_err(tile, "TLB", i, vector, severity); 327 break; 328 case ERR_STAT_GT_VECTOR7: 329 val = hweight32(vector); 330 atomic_add(val, &info[error_id].counter); 331 log_gt_err(tile, "L3 Fabric", i, vector, severity); 332 break; 333 default: 334 log_gt_err(tile, "Undefined", i, vector, severity); 335 } 336 337 xe_mmio_write32(mmio, ERR_STAT_GT_VECTOR_REG(hw_err, i), vector); 338 } 339 } 340 341 static void soc_slave_ieh_handler(struct xe_tile *tile, const enum hardware_error hw_err, u32 error_id) 342 { 343 const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err); 344 unsigned long slave_global_errstat, slave_local_errstat; 345 struct xe_mmio *mmio = &tile->mmio; 346 u32 regbit, slave; 347 348 slave = SOC_PVC_SLAVE_BASE; 349 slave_global_errstat = xe_mmio_read32(mmio, SOC_GLOBAL_ERR_STAT_REG(slave, hw_err)); 350 351 if (slave_global_errstat & SOC_IEH1_LOCAL_ERR_STATUS) { 352 slave_local_errstat = xe_mmio_read32(mmio, SOC_LOCAL_ERR_STAT_REG(slave, hw_err)); 353 354 if (hw_err == HARDWARE_ERROR_FATAL) { 355 for_each_set_bit(regbit, &slave_local_errstat, XE_RAS_REG_SIZE) 356 log_soc_error(tile, pvc_slave_local_fatal_err_reg, severity, 357 regbit, error_id); 358 } 359 360 xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(slave, hw_err), 361 slave_local_errstat); 362 } 363 364 for_each_set_bit(regbit, &slave_global_errstat, XE_RAS_REG_SIZE) 365 log_soc_error(tile, pvc_slave_global_err_reg, severity, regbit, error_id); 366 367 xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(slave, hw_err), slave_global_errstat); 368 } 369 370 static void soc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err, 371 u32 error_id) 372 { 373 const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err); 374 struct xe_device *xe = tile_to_xe(tile); 375 struct xe_mmio *mmio = &tile->mmio; 376 unsigned long master_global_errstat, master_local_errstat; 377 u32 master, slave, regbit; 378 int i; 379 380 if (xe->info.platform != XE_PVC) 381 return; 382 383 master = SOC_PVC_MASTER_BASE; 384 slave = SOC_PVC_SLAVE_BASE; 385 386 /* Mask error type in GSYSEVTCTL so that no new errors of the type will be reported */ 387 for (i = 0; i < XE_SOC_NUM_IEH; i++) 388 xe_mmio_write32(mmio, SOC_GSYSEVTCTL_REG(master, slave, i), ~REG_BIT(hw_err)); 389 390 if (hw_err == HARDWARE_ERROR_CORRECTABLE) { 391 xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(master, hw_err), REG_GENMASK(31, 0)); 392 xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(master, hw_err), REG_GENMASK(31, 0)); 393 xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(slave, hw_err), REG_GENMASK(31, 0)); 394 xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(slave, hw_err), REG_GENMASK(31, 0)); 395 goto unmask_gsysevtctl; 396 } 397 398 /* 399 * Read the master global IEH error register, if BIT(1) is set then process 400 * the slave IEH first. If BIT(0) in global error register is set then process 401 * the corresponding local error registers. 402 */ 403 master_global_errstat = xe_mmio_read32(mmio, SOC_GLOBAL_ERR_STAT_REG(master, hw_err)); 404 if (master_global_errstat & SOC_SLAVE_IEH) 405 soc_slave_ieh_handler(tile, hw_err, error_id); 406 407 if (master_global_errstat & SOC_IEH0_LOCAL_ERR_STATUS) { 408 master_local_errstat = xe_mmio_read32(mmio, SOC_LOCAL_ERR_STAT_REG(master, hw_err)); 409 410 for_each_set_bit(regbit, &master_local_errstat, XE_RAS_REG_SIZE) 411 log_soc_error(tile, PVC_MASTER_LOCAL_REG_INFO(hw_err), severity, regbit, error_id); 412 413 xe_mmio_write32(mmio, SOC_LOCAL_ERR_STAT_REG(master, hw_err), master_local_errstat); 414 } 415 416 for_each_set_bit(regbit, &master_global_errstat, XE_RAS_REG_SIZE) 417 log_soc_error(tile, pvc_master_global_err_reg, severity, regbit, error_id); 418 419 xe_mmio_write32(mmio, SOC_GLOBAL_ERR_STAT_REG(master, hw_err), master_global_errstat); 420 421 unmask_gsysevtctl: 422 for (i = 0; i < XE_SOC_NUM_IEH; i++) 423 xe_mmio_write32(mmio, SOC_GSYSEVTCTL_REG(master, slave, i), 424 (HARDWARE_ERROR_MAX << 1) + 1); 425 } 426 427 static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err) 428 { 429 const enum drm_xe_ras_error_severity severity = hw_err_to_severity(hw_err); 430 const char *severity_str = error_severity[severity]; 431 struct xe_device *xe = tile_to_xe(tile); 432 struct xe_drm_ras *ras = &xe->ras; 433 struct xe_drm_ras_counter *info = ras->info[severity]; 434 unsigned long flags, err_src; 435 u32 err_bit; 436 437 if (!IS_DGFX(xe)) 438 return; 439 440 spin_lock_irqsave(&xe->irq.lock, flags); 441 err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err)); 442 if (!err_src) { 443 drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported %s DEV_ERR_STAT register blank!\n", 444 tile->id, severity_str); 445 goto unlock; 446 } 447 448 /* 449 * On encountering CSC firmware errors, the graphics device becomes unrecoverable 450 * so return immediately on error. The only way to recover from these errors is 451 * firmware flash. The device will enter Runtime Survivability mode when such 452 * errors are detected. 453 */ 454 if (err_src & REG_BIT(XE_CSC_ERROR)) { 455 csc_hw_error_handler(tile, hw_err); 456 goto clear_reg; 457 } 458 459 if (!info) 460 goto clear_reg; 461 462 for_each_set_bit(err_bit, &err_src, XE_RAS_REG_SIZE) { 463 const char *name; 464 u32 error_id; 465 466 error_id = err_src_to_id(err_bit); 467 if (!error_id) 468 continue; 469 470 name = info[error_id].name; 471 if (!name) 472 continue; 473 474 if (severity == DRM_XE_RAS_ERR_SEV_CORRECTABLE) { 475 drm_warn(&xe->drm, HW_ERR 476 "TILE%d reported %s %s, bit[%d] is set\n", 477 tile->id, name, severity_str, err_bit); 478 } else { 479 drm_err_ratelimited(&xe->drm, HW_ERR 480 "TILE%d reported %s %s, bit[%d] is set\n", 481 tile->id, name, severity_str, err_bit); 482 } 483 484 if (err_bit == XE_GT_ERROR) 485 gt_hw_error_handler(tile, hw_err, error_id); 486 if (err_bit == XE_SOC_ERROR) 487 soc_hw_error_handler(tile, hw_err, error_id); 488 } 489 490 clear_reg: 491 xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src); 492 unlock: 493 spin_unlock_irqrestore(&xe->irq.lock, flags); 494 } 495 496 /** 497 * xe_hw_error_irq_handler - irq handling for hw errors 498 * @tile: tile instance 499 * @master_ctl: value read from master interrupt register 500 * 501 * Xe platforms add three error bits to the master interrupt register to support error handling. 502 * These three bits are used to convey the class of error FATAL, NONFATAL, or CORRECTABLE. 503 * To process the interrupt, determine the source of error by reading the Device Error Source 504 * Register that corresponds to the class of error being serviced. 505 */ 506 void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl) 507 { 508 enum hardware_error hw_err; 509 510 if (fault_inject_csc_hw_error()) 511 schedule_work(&tile->csc_hw_error_work); 512 513 for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++) { 514 if (master_ctl & ERROR_IRQ(hw_err)) 515 hw_error_source_handler(tile, hw_err); 516 } 517 } 518 519 static int hw_error_info_init(struct xe_device *xe) 520 { 521 if (xe->info.platform != XE_PVC) 522 return 0; 523 524 return xe_drm_ras_init(xe); 525 } 526 527 /* 528 * Process hardware errors during boot 529 */ 530 static void process_hw_errors(struct xe_device *xe) 531 { 532 struct xe_tile *tile; 533 u32 master_ctl; 534 u8 id; 535 536 for_each_tile(tile, xe, id) { 537 master_ctl = xe_mmio_read32(&tile->mmio, GFX_MSTR_IRQ); 538 xe_hw_error_irq_handler(tile, master_ctl); 539 xe_mmio_write32(&tile->mmio, GFX_MSTR_IRQ, master_ctl); 540 } 541 } 542 543 /** 544 * xe_hw_error_init - Initialize hw errors 545 * @xe: xe device instance 546 * 547 * Initialize and check for errors that occurred during boot 548 * prior to driver load 549 */ 550 void xe_hw_error_init(struct xe_device *xe) 551 { 552 struct xe_tile *tile = xe_device_get_root_tile(xe); 553 int ret; 554 555 if (!IS_DGFX(xe) || IS_SRIOV_VF(xe)) 556 return; 557 558 INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work); 559 560 ret = hw_error_info_init(xe); 561 if (ret) 562 drm_err(&xe->drm, "Failed to initialize XE DRM RAS (%pe)\n", ERR_PTR(ret)); 563 564 process_hw_errors(xe); 565 } 566