1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (c) 2016-2017 Hisilicon Limited. */ 3 4 #include <linux/sched/clock.h> 5 6 #include "hclge_err.h" 7 8 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { 9 { 10 .int_msk = BIT(1), 11 .msg = "imp_itcm0_ecc_mbit_err", 12 .reset_level = HNAE3_NONE_RESET 13 }, { 14 .int_msk = BIT(3), 15 .msg = "imp_itcm1_ecc_mbit_err", 16 .reset_level = HNAE3_NONE_RESET 17 }, { 18 .int_msk = BIT(5), 19 .msg = "imp_itcm2_ecc_mbit_err", 20 .reset_level = HNAE3_NONE_RESET 21 }, { 22 .int_msk = BIT(7), 23 .msg = "imp_itcm3_ecc_mbit_err", 24 .reset_level = HNAE3_NONE_RESET 25 }, { 26 .int_msk = BIT(9), 27 .msg = "imp_dtcm0_mem0_ecc_mbit_err", 28 .reset_level = HNAE3_NONE_RESET 29 }, { 30 .int_msk = BIT(11), 31 .msg = "imp_dtcm0_mem1_ecc_mbit_err", 32 .reset_level = HNAE3_NONE_RESET 33 }, { 34 .int_msk = BIT(13), 35 .msg = "imp_dtcm1_mem0_ecc_mbit_err", 36 .reset_level = HNAE3_NONE_RESET 37 }, { 38 .int_msk = BIT(15), 39 .msg = "imp_dtcm1_mem1_ecc_mbit_err", 40 .reset_level = HNAE3_NONE_RESET 41 }, { 42 .int_msk = BIT(17), 43 .msg = "imp_itcm4_ecc_mbit_err", 44 .reset_level = HNAE3_NONE_RESET 45 }, { 46 /* sentinel */ 47 } 48 }; 49 50 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { 51 { 52 .int_msk = BIT(1), 53 .msg = "cmdq_nic_rx_depth_ecc_mbit_err", 54 .reset_level = HNAE3_NONE_RESET 55 }, { 56 .int_msk = BIT(3), 57 .msg = "cmdq_nic_tx_depth_ecc_mbit_err", 58 .reset_level = HNAE3_NONE_RESET 59 }, { 60 .int_msk = BIT(5), 61 .msg = "cmdq_nic_rx_tail_ecc_mbit_err", 62 .reset_level = HNAE3_NONE_RESET 63 }, { 64 .int_msk = BIT(7), 65 .msg = "cmdq_nic_tx_tail_ecc_mbit_err", 66 .reset_level = HNAE3_NONE_RESET 67 }, { 68 .int_msk = BIT(9), 69 .msg = "cmdq_nic_rx_head_ecc_mbit_err", 70 .reset_level = HNAE3_NONE_RESET 71 }, { 72 .int_msk = BIT(11), 73 .msg = "cmdq_nic_tx_head_ecc_mbit_err", 74 .reset_level = HNAE3_NONE_RESET 75 }, { 76 .int_msk = BIT(13), 77 .msg = "cmdq_nic_rx_addr_ecc_mbit_err", 78 .reset_level = HNAE3_NONE_RESET 79 }, { 80 .int_msk = BIT(15), 81 .msg = "cmdq_nic_tx_addr_ecc_mbit_err", 82 .reset_level = HNAE3_NONE_RESET 83 }, { 84 .int_msk = BIT(17), 85 .msg = "cmdq_rocee_rx_depth_ecc_mbit_err", 86 .reset_level = HNAE3_NONE_RESET 87 }, { 88 .int_msk = BIT(19), 89 .msg = "cmdq_rocee_tx_depth_ecc_mbit_err", 90 .reset_level = HNAE3_NONE_RESET 91 }, { 92 .int_msk = BIT(21), 93 .msg = "cmdq_rocee_rx_tail_ecc_mbit_err", 94 .reset_level = HNAE3_NONE_RESET 95 }, { 96 .int_msk = BIT(23), 97 .msg = "cmdq_rocee_tx_tail_ecc_mbit_err", 98 .reset_level = HNAE3_NONE_RESET 99 }, { 100 .int_msk = BIT(25), 101 .msg = "cmdq_rocee_rx_head_ecc_mbit_err", 102 .reset_level = HNAE3_NONE_RESET 103 }, { 104 .int_msk = BIT(27), 105 .msg = "cmdq_rocee_tx_head_ecc_mbit_err", 106 .reset_level = HNAE3_NONE_RESET 107 }, { 108 .int_msk = BIT(29), 109 .msg = "cmdq_rocee_rx_addr_ecc_mbit_err", 110 .reset_level = HNAE3_NONE_RESET 111 }, { 112 .int_msk = BIT(31), 113 .msg = "cmdq_rocee_tx_addr_ecc_mbit_err", 114 .reset_level = HNAE3_NONE_RESET 115 }, { 116 /* sentinel */ 117 } 118 }; 119 120 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { 121 { 122 .int_msk = BIT(6), 123 .msg = "tqp_int_cfg_even_ecc_mbit_err", 124 .reset_level = HNAE3_NONE_RESET 125 }, { 126 .int_msk = BIT(7), 127 .msg = "tqp_int_cfg_odd_ecc_mbit_err", 128 .reset_level = HNAE3_NONE_RESET 129 }, { 130 .int_msk = BIT(8), 131 .msg = "tqp_int_ctrl_even_ecc_mbit_err", 132 .reset_level = HNAE3_NONE_RESET 133 }, { 134 .int_msk = BIT(9), 135 .msg = "tqp_int_ctrl_odd_ecc_mbit_err", 136 .reset_level = HNAE3_NONE_RESET 137 }, { 138 .int_msk = BIT(10), 139 .msg = "tx_que_scan_int_ecc_mbit_err", 140 .reset_level = HNAE3_NONE_RESET 141 }, { 142 .int_msk = BIT(11), 143 .msg = "rx_que_scan_int_ecc_mbit_err", 144 .reset_level = HNAE3_NONE_RESET 145 }, { 146 /* sentinel */ 147 } 148 }; 149 150 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { 151 { 152 .int_msk = BIT(1), 153 .msg = "msix_nic_ecc_mbit_err", 154 .reset_level = HNAE3_NONE_RESET 155 }, { 156 .int_msk = BIT(3), 157 .msg = "msix_rocee_ecc_mbit_err", 158 .reset_level = HNAE3_NONE_RESET 159 }, { 160 /* sentinel */ 161 } 162 }; 163 164 static const struct hclge_hw_error hclge_igu_int[] = { 165 { 166 .int_msk = BIT(0), 167 .msg = "igu_rx_buf0_ecc_mbit_err", 168 .reset_level = HNAE3_GLOBAL_RESET 169 }, { 170 .int_msk = BIT(2), 171 .msg = "igu_rx_buf1_ecc_mbit_err", 172 .reset_level = HNAE3_GLOBAL_RESET 173 }, { 174 /* sentinel */ 175 } 176 }; 177 178 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { 179 { 180 .int_msk = BIT(0), 181 .msg = "rx_buf_overflow", 182 .reset_level = HNAE3_GLOBAL_RESET 183 }, { 184 .int_msk = BIT(1), 185 .msg = "rx_stp_fifo_overflow", 186 .reset_level = HNAE3_GLOBAL_RESET 187 }, { 188 .int_msk = BIT(2), 189 .msg = "rx_stp_fifo_underflow", 190 .reset_level = HNAE3_GLOBAL_RESET 191 }, { 192 .int_msk = BIT(3), 193 .msg = "tx_buf_overflow", 194 .reset_level = HNAE3_GLOBAL_RESET 195 }, { 196 .int_msk = BIT(4), 197 .msg = "tx_buf_underrun", 198 .reset_level = HNAE3_GLOBAL_RESET 199 }, { 200 .int_msk = BIT(5), 201 .msg = "rx_stp_buf_overflow", 202 .reset_level = HNAE3_GLOBAL_RESET 203 }, { 204 /* sentinel */ 205 } 206 }; 207 208 static const struct hclge_hw_error hclge_ncsi_err_int[] = { 209 { 210 .int_msk = BIT(1), 211 .msg = "ncsi_tx_ecc_mbit_err", 212 .reset_level = HNAE3_NONE_RESET 213 }, { 214 /* sentinel */ 215 } 216 }; 217 218 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { 219 { 220 .int_msk = BIT(0), 221 .msg = "vf_vlan_ad_mem_ecc_mbit_err", 222 .reset_level = HNAE3_GLOBAL_RESET 223 }, { 224 .int_msk = BIT(1), 225 .msg = "umv_mcast_group_mem_ecc_mbit_err", 226 .reset_level = HNAE3_GLOBAL_RESET 227 }, { 228 .int_msk = BIT(2), 229 .msg = "umv_key_mem0_ecc_mbit_err", 230 .reset_level = HNAE3_GLOBAL_RESET 231 }, { 232 .int_msk = BIT(3), 233 .msg = "umv_key_mem1_ecc_mbit_err", 234 .reset_level = HNAE3_GLOBAL_RESET 235 }, { 236 .int_msk = BIT(4), 237 .msg = "umv_key_mem2_ecc_mbit_err", 238 .reset_level = HNAE3_GLOBAL_RESET 239 }, { 240 .int_msk = BIT(5), 241 .msg = "umv_key_mem3_ecc_mbit_err", 242 .reset_level = HNAE3_GLOBAL_RESET 243 }, { 244 .int_msk = BIT(6), 245 .msg = "umv_ad_mem_ecc_mbit_err", 246 .reset_level = HNAE3_GLOBAL_RESET 247 }, { 248 .int_msk = BIT(7), 249 .msg = "rss_tc_mode_mem_ecc_mbit_err", 250 .reset_level = HNAE3_GLOBAL_RESET 251 }, { 252 .int_msk = BIT(8), 253 .msg = "rss_idt_mem0_ecc_mbit_err", 254 .reset_level = HNAE3_GLOBAL_RESET 255 }, { 256 .int_msk = BIT(9), 257 .msg = "rss_idt_mem1_ecc_mbit_err", 258 .reset_level = HNAE3_GLOBAL_RESET 259 }, { 260 .int_msk = BIT(10), 261 .msg = "rss_idt_mem2_ecc_mbit_err", 262 .reset_level = HNAE3_GLOBAL_RESET 263 }, { 264 .int_msk = BIT(11), 265 .msg = "rss_idt_mem3_ecc_mbit_err", 266 .reset_level = HNAE3_GLOBAL_RESET 267 }, { 268 .int_msk = BIT(12), 269 .msg = "rss_idt_mem4_ecc_mbit_err", 270 .reset_level = HNAE3_GLOBAL_RESET 271 }, { 272 .int_msk = BIT(13), 273 .msg = "rss_idt_mem5_ecc_mbit_err", 274 .reset_level = HNAE3_GLOBAL_RESET 275 }, { 276 .int_msk = BIT(14), 277 .msg = "rss_idt_mem6_ecc_mbit_err", 278 .reset_level = HNAE3_GLOBAL_RESET 279 }, { 280 .int_msk = BIT(15), 281 .msg = "rss_idt_mem7_ecc_mbit_err", 282 .reset_level = HNAE3_GLOBAL_RESET 283 }, { 284 .int_msk = BIT(16), 285 .msg = "rss_idt_mem8_ecc_mbit_err", 286 .reset_level = HNAE3_GLOBAL_RESET 287 }, { 288 .int_msk = BIT(17), 289 .msg = "rss_idt_mem9_ecc_mbit_err", 290 .reset_level = HNAE3_GLOBAL_RESET 291 }, { 292 .int_msk = BIT(18), 293 .msg = "rss_idt_mem10_ecc_mbit_err", 294 .reset_level = HNAE3_GLOBAL_RESET 295 }, { 296 .int_msk = BIT(19), 297 .msg = "rss_idt_mem11_ecc_mbit_err", 298 .reset_level = HNAE3_GLOBAL_RESET 299 }, { 300 .int_msk = BIT(20), 301 .msg = "rss_idt_mem12_ecc_mbit_err", 302 .reset_level = HNAE3_GLOBAL_RESET 303 }, { 304 .int_msk = BIT(21), 305 .msg = "rss_idt_mem13_ecc_mbit_err", 306 .reset_level = HNAE3_GLOBAL_RESET 307 }, { 308 .int_msk = BIT(22), 309 .msg = "rss_idt_mem14_ecc_mbit_err", 310 .reset_level = HNAE3_GLOBAL_RESET 311 }, { 312 .int_msk = BIT(23), 313 .msg = "rss_idt_mem15_ecc_mbit_err", 314 .reset_level = HNAE3_GLOBAL_RESET 315 }, { 316 .int_msk = BIT(24), 317 .msg = "port_vlan_mem_ecc_mbit_err", 318 .reset_level = HNAE3_GLOBAL_RESET 319 }, { 320 .int_msk = BIT(25), 321 .msg = "mcast_linear_table_mem_ecc_mbit_err", 322 .reset_level = HNAE3_GLOBAL_RESET 323 }, { 324 .int_msk = BIT(26), 325 .msg = "mcast_result_mem_ecc_mbit_err", 326 .reset_level = HNAE3_GLOBAL_RESET 327 }, { 328 .int_msk = BIT(27), 329 .msg = "flow_director_ad_mem0_ecc_mbit_err", 330 .reset_level = HNAE3_GLOBAL_RESET 331 }, { 332 .int_msk = BIT(28), 333 .msg = "flow_director_ad_mem1_ecc_mbit_err", 334 .reset_level = HNAE3_GLOBAL_RESET 335 }, { 336 .int_msk = BIT(29), 337 .msg = "rx_vlan_tag_memory_ecc_mbit_err", 338 .reset_level = HNAE3_GLOBAL_RESET 339 }, { 340 .int_msk = BIT(30), 341 .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err", 342 .reset_level = HNAE3_GLOBAL_RESET 343 }, { 344 /* sentinel */ 345 } 346 }; 347 348 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { 349 { 350 .int_msk = BIT(0), 351 .msg = "tx_vlan_tag_err", 352 .reset_level = HNAE3_NONE_RESET 353 }, { 354 .int_msk = BIT(1), 355 .msg = "rss_list_tc_unassigned_queue_err", 356 .reset_level = HNAE3_NONE_RESET 357 }, { 358 /* sentinel */ 359 } 360 }; 361 362 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { 363 { 364 .int_msk = BIT(0), 365 .msg = "hfs_fifo_mem_ecc_mbit_err", 366 .reset_level = HNAE3_GLOBAL_RESET 367 }, { 368 .int_msk = BIT(1), 369 .msg = "rslt_descr_fifo_mem_ecc_mbit_err", 370 .reset_level = HNAE3_GLOBAL_RESET 371 }, { 372 .int_msk = BIT(2), 373 .msg = "tx_vlan_tag_mem_ecc_mbit_err", 374 .reset_level = HNAE3_GLOBAL_RESET 375 }, { 376 .int_msk = BIT(3), 377 .msg = "FD_CN0_memory_ecc_mbit_err", 378 .reset_level = HNAE3_GLOBAL_RESET 379 }, { 380 .int_msk = BIT(4), 381 .msg = "FD_CN1_memory_ecc_mbit_err", 382 .reset_level = HNAE3_GLOBAL_RESET 383 }, { 384 .int_msk = BIT(5), 385 .msg = "GRO_AD_memory_ecc_mbit_err", 386 .reset_level = HNAE3_GLOBAL_RESET 387 }, { 388 /* sentinel */ 389 } 390 }; 391 392 static const struct hclge_hw_error hclge_tm_sch_rint[] = { 393 { 394 .int_msk = BIT(1), 395 .msg = "tm_sch_ecc_mbit_err", 396 .reset_level = HNAE3_GLOBAL_RESET 397 }, { 398 .int_msk = BIT(2), 399 .msg = "tm_sch_port_shap_sub_fifo_wr_err", 400 .reset_level = HNAE3_GLOBAL_RESET 401 }, { 402 .int_msk = BIT(3), 403 .msg = "tm_sch_port_shap_sub_fifo_rd_err", 404 .reset_level = HNAE3_GLOBAL_RESET 405 }, { 406 .int_msk = BIT(4), 407 .msg = "tm_sch_pg_pshap_sub_fifo_wr_err", 408 .reset_level = HNAE3_GLOBAL_RESET 409 }, { 410 .int_msk = BIT(5), 411 .msg = "tm_sch_pg_pshap_sub_fifo_rd_err", 412 .reset_level = HNAE3_GLOBAL_RESET 413 }, { 414 .int_msk = BIT(6), 415 .msg = "tm_sch_pg_cshap_sub_fifo_wr_err", 416 .reset_level = HNAE3_GLOBAL_RESET 417 }, { 418 .int_msk = BIT(7), 419 .msg = "tm_sch_pg_cshap_sub_fifo_rd_err", 420 .reset_level = HNAE3_GLOBAL_RESET 421 }, { 422 .int_msk = BIT(8), 423 .msg = "tm_sch_pri_pshap_sub_fifo_wr_err", 424 .reset_level = HNAE3_GLOBAL_RESET 425 }, { 426 .int_msk = BIT(9), 427 .msg = "tm_sch_pri_pshap_sub_fifo_rd_err", 428 .reset_level = HNAE3_GLOBAL_RESET 429 }, { 430 .int_msk = BIT(10), 431 .msg = "tm_sch_pri_cshap_sub_fifo_wr_err", 432 .reset_level = HNAE3_GLOBAL_RESET 433 }, { 434 .int_msk = BIT(11), 435 .msg = "tm_sch_pri_cshap_sub_fifo_rd_err", 436 .reset_level = HNAE3_GLOBAL_RESET 437 }, { 438 .int_msk = BIT(12), 439 .msg = "tm_sch_port_shap_offset_fifo_wr_err", 440 .reset_level = HNAE3_GLOBAL_RESET 441 }, { 442 .int_msk = BIT(13), 443 .msg = "tm_sch_port_shap_offset_fifo_rd_err", 444 .reset_level = HNAE3_GLOBAL_RESET 445 }, { 446 .int_msk = BIT(14), 447 .msg = "tm_sch_pg_pshap_offset_fifo_wr_err", 448 .reset_level = HNAE3_GLOBAL_RESET 449 }, { 450 .int_msk = BIT(15), 451 .msg = "tm_sch_pg_pshap_offset_fifo_rd_err", 452 .reset_level = HNAE3_GLOBAL_RESET 453 }, { 454 .int_msk = BIT(16), 455 .msg = "tm_sch_pg_cshap_offset_fifo_wr_err", 456 .reset_level = HNAE3_GLOBAL_RESET 457 }, { 458 .int_msk = BIT(17), 459 .msg = "tm_sch_pg_cshap_offset_fifo_rd_err", 460 .reset_level = HNAE3_GLOBAL_RESET 461 }, { 462 .int_msk = BIT(18), 463 .msg = "tm_sch_pri_pshap_offset_fifo_wr_err", 464 .reset_level = HNAE3_GLOBAL_RESET 465 }, { 466 .int_msk = BIT(19), 467 .msg = "tm_sch_pri_pshap_offset_fifo_rd_err", 468 .reset_level = HNAE3_GLOBAL_RESET 469 }, { 470 .int_msk = BIT(20), 471 .msg = "tm_sch_pri_cshap_offset_fifo_wr_err", 472 .reset_level = HNAE3_GLOBAL_RESET 473 }, { 474 .int_msk = BIT(21), 475 .msg = "tm_sch_pri_cshap_offset_fifo_rd_err", 476 .reset_level = HNAE3_GLOBAL_RESET 477 }, { 478 .int_msk = BIT(22), 479 .msg = "tm_sch_rq_fifo_wr_err", 480 .reset_level = HNAE3_GLOBAL_RESET 481 }, { 482 .int_msk = BIT(23), 483 .msg = "tm_sch_rq_fifo_rd_err", 484 .reset_level = HNAE3_GLOBAL_RESET 485 }, { 486 .int_msk = BIT(24), 487 .msg = "tm_sch_nq_fifo_wr_err", 488 .reset_level = HNAE3_GLOBAL_RESET 489 }, { 490 .int_msk = BIT(25), 491 .msg = "tm_sch_nq_fifo_rd_err", 492 .reset_level = HNAE3_GLOBAL_RESET 493 }, { 494 .int_msk = BIT(26), 495 .msg = "tm_sch_roce_up_fifo_wr_err", 496 .reset_level = HNAE3_GLOBAL_RESET 497 }, { 498 .int_msk = BIT(27), 499 .msg = "tm_sch_roce_up_fifo_rd_err", 500 .reset_level = HNAE3_GLOBAL_RESET 501 }, { 502 .int_msk = BIT(28), 503 .msg = "tm_sch_rcb_byte_fifo_wr_err", 504 .reset_level = HNAE3_GLOBAL_RESET 505 }, { 506 .int_msk = BIT(29), 507 .msg = "tm_sch_rcb_byte_fifo_rd_err", 508 .reset_level = HNAE3_GLOBAL_RESET 509 }, { 510 .int_msk = BIT(30), 511 .msg = "tm_sch_ssu_byte_fifo_wr_err", 512 .reset_level = HNAE3_GLOBAL_RESET 513 }, { 514 .int_msk = BIT(31), 515 .msg = "tm_sch_ssu_byte_fifo_rd_err", 516 .reset_level = HNAE3_GLOBAL_RESET 517 }, { 518 /* sentinel */ 519 } 520 }; 521 522 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { 523 { 524 .int_msk = BIT(0), 525 .msg = "qcn_shap_gp0_sch_fifo_rd_err", 526 .reset_level = HNAE3_GLOBAL_RESET 527 }, { 528 .int_msk = BIT(1), 529 .msg = "qcn_shap_gp0_sch_fifo_wr_err", 530 .reset_level = HNAE3_GLOBAL_RESET 531 }, { 532 .int_msk = BIT(2), 533 .msg = "qcn_shap_gp1_sch_fifo_rd_err", 534 .reset_level = HNAE3_GLOBAL_RESET 535 }, { 536 .int_msk = BIT(3), 537 .msg = "qcn_shap_gp1_sch_fifo_wr_err", 538 .reset_level = HNAE3_GLOBAL_RESET 539 }, { 540 .int_msk = BIT(4), 541 .msg = "qcn_shap_gp2_sch_fifo_rd_err", 542 .reset_level = HNAE3_GLOBAL_RESET 543 }, { 544 .int_msk = BIT(5), 545 .msg = "qcn_shap_gp2_sch_fifo_wr_err", 546 .reset_level = HNAE3_GLOBAL_RESET 547 }, { 548 .int_msk = BIT(6), 549 .msg = "qcn_shap_gp3_sch_fifo_rd_err", 550 .reset_level = HNAE3_GLOBAL_RESET 551 }, { 552 .int_msk = BIT(7), 553 .msg = "qcn_shap_gp3_sch_fifo_wr_err", 554 .reset_level = HNAE3_GLOBAL_RESET 555 }, { 556 .int_msk = BIT(8), 557 .msg = "qcn_shap_gp0_offset_fifo_rd_err", 558 .reset_level = HNAE3_GLOBAL_RESET 559 }, { 560 .int_msk = BIT(9), 561 .msg = "qcn_shap_gp0_offset_fifo_wr_err", 562 .reset_level = HNAE3_GLOBAL_RESET 563 }, { 564 .int_msk = BIT(10), 565 .msg = "qcn_shap_gp1_offset_fifo_rd_err", 566 .reset_level = HNAE3_GLOBAL_RESET 567 }, { 568 .int_msk = BIT(11), 569 .msg = "qcn_shap_gp1_offset_fifo_wr_err", 570 .reset_level = HNAE3_GLOBAL_RESET 571 }, { 572 .int_msk = BIT(12), 573 .msg = "qcn_shap_gp2_offset_fifo_rd_err", 574 .reset_level = HNAE3_GLOBAL_RESET 575 }, { 576 .int_msk = BIT(13), 577 .msg = "qcn_shap_gp2_offset_fifo_wr_err", 578 .reset_level = HNAE3_GLOBAL_RESET 579 }, { 580 .int_msk = BIT(14), 581 .msg = "qcn_shap_gp3_offset_fifo_rd_err", 582 .reset_level = HNAE3_GLOBAL_RESET 583 }, { 584 .int_msk = BIT(15), 585 .msg = "qcn_shap_gp3_offset_fifo_wr_err", 586 .reset_level = HNAE3_GLOBAL_RESET 587 }, { 588 .int_msk = BIT(16), 589 .msg = "qcn_byte_info_fifo_rd_err", 590 .reset_level = HNAE3_GLOBAL_RESET 591 }, { 592 .int_msk = BIT(17), 593 .msg = "qcn_byte_info_fifo_wr_err", 594 .reset_level = HNAE3_GLOBAL_RESET 595 }, { 596 /* sentinel */ 597 } 598 }; 599 600 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { 601 { 602 .int_msk = BIT(1), 603 .msg = "qcn_byte_mem_ecc_mbit_err", 604 .reset_level = HNAE3_GLOBAL_RESET 605 }, { 606 .int_msk = BIT(3), 607 .msg = "qcn_time_mem_ecc_mbit_err", 608 .reset_level = HNAE3_GLOBAL_RESET 609 }, { 610 .int_msk = BIT(5), 611 .msg = "qcn_fb_mem_ecc_mbit_err", 612 .reset_level = HNAE3_GLOBAL_RESET 613 }, { 614 .int_msk = BIT(7), 615 .msg = "qcn_link_mem_ecc_mbit_err", 616 .reset_level = HNAE3_GLOBAL_RESET 617 }, { 618 .int_msk = BIT(9), 619 .msg = "qcn_rate_mem_ecc_mbit_err", 620 .reset_level = HNAE3_GLOBAL_RESET 621 }, { 622 .int_msk = BIT(11), 623 .msg = "qcn_tmplt_mem_ecc_mbit_err", 624 .reset_level = HNAE3_GLOBAL_RESET 625 }, { 626 .int_msk = BIT(13), 627 .msg = "qcn_shap_cfg_mem_ecc_mbit_err", 628 .reset_level = HNAE3_GLOBAL_RESET 629 }, { 630 .int_msk = BIT(15), 631 .msg = "qcn_gp0_barrel_mem_ecc_mbit_err", 632 .reset_level = HNAE3_GLOBAL_RESET 633 }, { 634 .int_msk = BIT(17), 635 .msg = "qcn_gp1_barrel_mem_ecc_mbit_err", 636 .reset_level = HNAE3_GLOBAL_RESET 637 }, { 638 .int_msk = BIT(19), 639 .msg = "qcn_gp2_barrel_mem_ecc_mbit_err", 640 .reset_level = HNAE3_GLOBAL_RESET 641 }, { 642 .int_msk = BIT(21), 643 .msg = "qcn_gp3_barral_mem_ecc_mbit_err", 644 .reset_level = HNAE3_GLOBAL_RESET 645 }, { 646 /* sentinel */ 647 } 648 }; 649 650 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { 651 { 652 .int_msk = BIT(0), 653 .msg = "egu_cge_afifo_ecc_1bit_err", 654 .reset_level = HNAE3_NONE_RESET 655 }, { 656 .int_msk = BIT(1), 657 .msg = "egu_cge_afifo_ecc_mbit_err", 658 .reset_level = HNAE3_GLOBAL_RESET 659 }, { 660 .int_msk = BIT(2), 661 .msg = "egu_lge_afifo_ecc_1bit_err", 662 .reset_level = HNAE3_NONE_RESET 663 }, { 664 .int_msk = BIT(3), 665 .msg = "egu_lge_afifo_ecc_mbit_err", 666 .reset_level = HNAE3_GLOBAL_RESET 667 }, { 668 .int_msk = BIT(4), 669 .msg = "cge_igu_afifo_ecc_1bit_err", 670 .reset_level = HNAE3_NONE_RESET 671 }, { 672 .int_msk = BIT(5), 673 .msg = "cge_igu_afifo_ecc_mbit_err", 674 .reset_level = HNAE3_GLOBAL_RESET 675 }, { 676 .int_msk = BIT(6), 677 .msg = "lge_igu_afifo_ecc_1bit_err", 678 .reset_level = HNAE3_NONE_RESET 679 }, { 680 .int_msk = BIT(7), 681 .msg = "lge_igu_afifo_ecc_mbit_err", 682 .reset_level = HNAE3_GLOBAL_RESET 683 }, { 684 .int_msk = BIT(8), 685 .msg = "cge_igu_afifo_overflow_err", 686 .reset_level = HNAE3_GLOBAL_RESET 687 }, { 688 .int_msk = BIT(9), 689 .msg = "lge_igu_afifo_overflow_err", 690 .reset_level = HNAE3_GLOBAL_RESET 691 }, { 692 .int_msk = BIT(10), 693 .msg = "egu_cge_afifo_underrun_err", 694 .reset_level = HNAE3_GLOBAL_RESET 695 }, { 696 .int_msk = BIT(11), 697 .msg = "egu_lge_afifo_underrun_err", 698 .reset_level = HNAE3_GLOBAL_RESET 699 }, { 700 .int_msk = BIT(12), 701 .msg = "egu_ge_afifo_underrun_err", 702 .reset_level = HNAE3_GLOBAL_RESET 703 }, { 704 .int_msk = BIT(13), 705 .msg = "ge_igu_afifo_overflow_err", 706 .reset_level = HNAE3_GLOBAL_RESET 707 }, { 708 /* sentinel */ 709 } 710 }; 711 712 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { 713 { 714 .int_msk = BIT(13), 715 .msg = "rpu_rx_pkt_bit32_ecc_mbit_err", 716 .reset_level = HNAE3_GLOBAL_RESET 717 }, { 718 .int_msk = BIT(14), 719 .msg = "rpu_rx_pkt_bit33_ecc_mbit_err", 720 .reset_level = HNAE3_GLOBAL_RESET 721 }, { 722 .int_msk = BIT(15), 723 .msg = "rpu_rx_pkt_bit34_ecc_mbit_err", 724 .reset_level = HNAE3_GLOBAL_RESET 725 }, { 726 .int_msk = BIT(16), 727 .msg = "rpu_rx_pkt_bit35_ecc_mbit_err", 728 .reset_level = HNAE3_GLOBAL_RESET 729 }, { 730 .int_msk = BIT(17), 731 .msg = "rcb_tx_ring_ecc_mbit_err", 732 .reset_level = HNAE3_GLOBAL_RESET 733 }, { 734 .int_msk = BIT(18), 735 .msg = "rcb_rx_ring_ecc_mbit_err", 736 .reset_level = HNAE3_GLOBAL_RESET 737 }, { 738 .int_msk = BIT(19), 739 .msg = "rcb_tx_fbd_ecc_mbit_err", 740 .reset_level = HNAE3_GLOBAL_RESET 741 }, { 742 .int_msk = BIT(20), 743 .msg = "rcb_rx_ebd_ecc_mbit_err", 744 .reset_level = HNAE3_GLOBAL_RESET 745 }, { 746 .int_msk = BIT(21), 747 .msg = "rcb_tso_info_ecc_mbit_err", 748 .reset_level = HNAE3_GLOBAL_RESET 749 }, { 750 .int_msk = BIT(22), 751 .msg = "rcb_tx_int_info_ecc_mbit_err", 752 .reset_level = HNAE3_GLOBAL_RESET 753 }, { 754 .int_msk = BIT(23), 755 .msg = "rcb_rx_int_info_ecc_mbit_err", 756 .reset_level = HNAE3_GLOBAL_RESET 757 }, { 758 .int_msk = BIT(24), 759 .msg = "tpu_tx_pkt_0_ecc_mbit_err", 760 .reset_level = HNAE3_GLOBAL_RESET 761 }, { 762 .int_msk = BIT(25), 763 .msg = "tpu_tx_pkt_1_ecc_mbit_err", 764 .reset_level = HNAE3_GLOBAL_RESET 765 }, { 766 .int_msk = BIT(26), 767 .msg = "rd_bus_err", 768 .reset_level = HNAE3_GLOBAL_RESET 769 }, { 770 .int_msk = BIT(27), 771 .msg = "wr_bus_err", 772 .reset_level = HNAE3_GLOBAL_RESET 773 }, { 774 .int_msk = BIT(28), 775 .msg = "reg_search_miss", 776 .reset_level = HNAE3_GLOBAL_RESET 777 }, { 778 .int_msk = BIT(29), 779 .msg = "rx_q_search_miss", 780 .reset_level = HNAE3_NONE_RESET 781 }, { 782 .int_msk = BIT(30), 783 .msg = "ooo_ecc_err_detect", 784 .reset_level = HNAE3_NONE_RESET 785 }, { 786 .int_msk = BIT(31), 787 .msg = "ooo_ecc_err_multpl", 788 .reset_level = HNAE3_GLOBAL_RESET 789 }, { 790 /* sentinel */ 791 } 792 }; 793 794 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { 795 { 796 .int_msk = BIT(4), 797 .msg = "gro_bd_ecc_mbit_err", 798 .reset_level = HNAE3_GLOBAL_RESET 799 }, { 800 .int_msk = BIT(5), 801 .msg = "gro_context_ecc_mbit_err", 802 .reset_level = HNAE3_GLOBAL_RESET 803 }, { 804 .int_msk = BIT(6), 805 .msg = "rx_stash_cfg_ecc_mbit_err", 806 .reset_level = HNAE3_GLOBAL_RESET 807 }, { 808 .int_msk = BIT(7), 809 .msg = "axi_rd_fbd_ecc_mbit_err", 810 .reset_level = HNAE3_GLOBAL_RESET 811 }, { 812 /* sentinel */ 813 } 814 }; 815 816 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { 817 { 818 .int_msk = BIT(0), 819 .msg = "over_8bd_no_fe", 820 .reset_level = HNAE3_FUNC_RESET 821 }, { 822 .int_msk = BIT(1), 823 .msg = "tso_mss_cmp_min_err", 824 .reset_level = HNAE3_NONE_RESET 825 }, { 826 .int_msk = BIT(2), 827 .msg = "tso_mss_cmp_max_err", 828 .reset_level = HNAE3_NONE_RESET 829 }, { 830 .int_msk = BIT(3), 831 .msg = "tx_rd_fbd_poison", 832 .reset_level = HNAE3_FUNC_RESET 833 }, { 834 .int_msk = BIT(4), 835 .msg = "rx_rd_ebd_poison", 836 .reset_level = HNAE3_FUNC_RESET 837 }, { 838 .int_msk = BIT(5), 839 .msg = "buf_wait_timeout", 840 .reset_level = HNAE3_NONE_RESET 841 }, { 842 /* sentinel */ 843 } 844 }; 845 846 static const struct hclge_hw_error hclge_ssu_com_err_int[] = { 847 { 848 .int_msk = BIT(0), 849 .msg = "buf_sum_err", 850 .reset_level = HNAE3_NONE_RESET 851 }, { 852 .int_msk = BIT(1), 853 .msg = "ppp_mb_num_err", 854 .reset_level = HNAE3_NONE_RESET 855 }, { 856 .int_msk = BIT(2), 857 .msg = "ppp_mbid_err", 858 .reset_level = HNAE3_GLOBAL_RESET 859 }, { 860 .int_msk = BIT(3), 861 .msg = "ppp_rlt_mac_err", 862 .reset_level = HNAE3_GLOBAL_RESET 863 }, { 864 .int_msk = BIT(4), 865 .msg = "ppp_rlt_host_err", 866 .reset_level = HNAE3_GLOBAL_RESET 867 }, { 868 .int_msk = BIT(5), 869 .msg = "cks_edit_position_err", 870 .reset_level = HNAE3_GLOBAL_RESET 871 }, { 872 .int_msk = BIT(6), 873 .msg = "cks_edit_condition_err", 874 .reset_level = HNAE3_GLOBAL_RESET 875 }, { 876 .int_msk = BIT(7), 877 .msg = "vlan_edit_condition_err", 878 .reset_level = HNAE3_GLOBAL_RESET 879 }, { 880 .int_msk = BIT(8), 881 .msg = "vlan_num_ot_err", 882 .reset_level = HNAE3_GLOBAL_RESET 883 }, { 884 .int_msk = BIT(9), 885 .msg = "vlan_num_in_err", 886 .reset_level = HNAE3_GLOBAL_RESET 887 }, { 888 /* sentinel */ 889 } 890 }; 891 892 #define HCLGE_SSU_MEM_ECC_ERR(x) \ 893 { \ 894 .int_msk = BIT(x), \ 895 .msg = "ssu_mem" #x "_ecc_mbit_err", \ 896 .reset_level = HNAE3_GLOBAL_RESET \ 897 } 898 899 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { 900 HCLGE_SSU_MEM_ECC_ERR(0), 901 HCLGE_SSU_MEM_ECC_ERR(1), 902 HCLGE_SSU_MEM_ECC_ERR(2), 903 HCLGE_SSU_MEM_ECC_ERR(3), 904 HCLGE_SSU_MEM_ECC_ERR(4), 905 HCLGE_SSU_MEM_ECC_ERR(5), 906 HCLGE_SSU_MEM_ECC_ERR(6), 907 HCLGE_SSU_MEM_ECC_ERR(7), 908 HCLGE_SSU_MEM_ECC_ERR(8), 909 HCLGE_SSU_MEM_ECC_ERR(9), 910 HCLGE_SSU_MEM_ECC_ERR(10), 911 HCLGE_SSU_MEM_ECC_ERR(11), 912 HCLGE_SSU_MEM_ECC_ERR(12), 913 HCLGE_SSU_MEM_ECC_ERR(13), 914 HCLGE_SSU_MEM_ECC_ERR(14), 915 HCLGE_SSU_MEM_ECC_ERR(15), 916 HCLGE_SSU_MEM_ECC_ERR(16), 917 HCLGE_SSU_MEM_ECC_ERR(17), 918 HCLGE_SSU_MEM_ECC_ERR(18), 919 HCLGE_SSU_MEM_ECC_ERR(19), 920 HCLGE_SSU_MEM_ECC_ERR(20), 921 HCLGE_SSU_MEM_ECC_ERR(21), 922 HCLGE_SSU_MEM_ECC_ERR(22), 923 HCLGE_SSU_MEM_ECC_ERR(23), 924 HCLGE_SSU_MEM_ECC_ERR(24), 925 HCLGE_SSU_MEM_ECC_ERR(25), 926 HCLGE_SSU_MEM_ECC_ERR(26), 927 HCLGE_SSU_MEM_ECC_ERR(27), 928 HCLGE_SSU_MEM_ECC_ERR(28), 929 HCLGE_SSU_MEM_ECC_ERR(29), 930 HCLGE_SSU_MEM_ECC_ERR(30), 931 HCLGE_SSU_MEM_ECC_ERR(31), 932 { /* sentinel */ } 933 }; 934 935 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { 936 { 937 .int_msk = BIT(0), 938 .msg = "roc_pkt_without_key_port", 939 .reset_level = HNAE3_FUNC_RESET 940 }, { 941 .int_msk = BIT(1), 942 .msg = "tpu_pkt_without_key_port", 943 .reset_level = HNAE3_GLOBAL_RESET 944 }, { 945 .int_msk = BIT(2), 946 .msg = "igu_pkt_without_key_port", 947 .reset_level = HNAE3_GLOBAL_RESET 948 }, { 949 .int_msk = BIT(3), 950 .msg = "roc_eof_mis_match_port", 951 .reset_level = HNAE3_GLOBAL_RESET 952 }, { 953 .int_msk = BIT(4), 954 .msg = "tpu_eof_mis_match_port", 955 .reset_level = HNAE3_GLOBAL_RESET 956 }, { 957 .int_msk = BIT(5), 958 .msg = "igu_eof_mis_match_port", 959 .reset_level = HNAE3_GLOBAL_RESET 960 }, { 961 .int_msk = BIT(6), 962 .msg = "roc_sof_mis_match_port", 963 .reset_level = HNAE3_GLOBAL_RESET 964 }, { 965 .int_msk = BIT(7), 966 .msg = "tpu_sof_mis_match_port", 967 .reset_level = HNAE3_GLOBAL_RESET 968 }, { 969 .int_msk = BIT(8), 970 .msg = "igu_sof_mis_match_port", 971 .reset_level = HNAE3_GLOBAL_RESET 972 }, { 973 .int_msk = BIT(11), 974 .msg = "ets_rd_int_rx_port", 975 .reset_level = HNAE3_GLOBAL_RESET 976 }, { 977 .int_msk = BIT(12), 978 .msg = "ets_wr_int_rx_port", 979 .reset_level = HNAE3_GLOBAL_RESET 980 }, { 981 .int_msk = BIT(13), 982 .msg = "ets_rd_int_tx_port", 983 .reset_level = HNAE3_GLOBAL_RESET 984 }, { 985 .int_msk = BIT(14), 986 .msg = "ets_wr_int_tx_port", 987 .reset_level = HNAE3_GLOBAL_RESET 988 }, { 989 /* sentinel */ 990 } 991 }; 992 993 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { 994 { 995 .int_msk = BIT(0), 996 .msg = "ig_mac_inf_int", 997 .reset_level = HNAE3_GLOBAL_RESET 998 }, { 999 .int_msk = BIT(1), 1000 .msg = "ig_host_inf_int", 1001 .reset_level = HNAE3_GLOBAL_RESET 1002 }, { 1003 .int_msk = BIT(2), 1004 .msg = "ig_roc_buf_int", 1005 .reset_level = HNAE3_GLOBAL_RESET 1006 }, { 1007 .int_msk = BIT(3), 1008 .msg = "ig_host_data_fifo_int", 1009 .reset_level = HNAE3_GLOBAL_RESET 1010 }, { 1011 .int_msk = BIT(4), 1012 .msg = "ig_host_key_fifo_int", 1013 .reset_level = HNAE3_GLOBAL_RESET 1014 }, { 1015 .int_msk = BIT(5), 1016 .msg = "tx_qcn_fifo_int", 1017 .reset_level = HNAE3_GLOBAL_RESET 1018 }, { 1019 .int_msk = BIT(6), 1020 .msg = "rx_qcn_fifo_int", 1021 .reset_level = HNAE3_GLOBAL_RESET 1022 }, { 1023 .int_msk = BIT(7), 1024 .msg = "tx_pf_rd_fifo_int", 1025 .reset_level = HNAE3_GLOBAL_RESET 1026 }, { 1027 .int_msk = BIT(8), 1028 .msg = "rx_pf_rd_fifo_int", 1029 .reset_level = HNAE3_GLOBAL_RESET 1030 }, { 1031 .int_msk = BIT(9), 1032 .msg = "qm_eof_fifo_int", 1033 .reset_level = HNAE3_GLOBAL_RESET 1034 }, { 1035 .int_msk = BIT(10), 1036 .msg = "mb_rlt_fifo_int", 1037 .reset_level = HNAE3_GLOBAL_RESET 1038 }, { 1039 .int_msk = BIT(11), 1040 .msg = "dup_uncopy_fifo_int", 1041 .reset_level = HNAE3_GLOBAL_RESET 1042 }, { 1043 .int_msk = BIT(12), 1044 .msg = "dup_cnt_rd_fifo_int", 1045 .reset_level = HNAE3_GLOBAL_RESET 1046 }, { 1047 .int_msk = BIT(13), 1048 .msg = "dup_cnt_drop_fifo_int", 1049 .reset_level = HNAE3_GLOBAL_RESET 1050 }, { 1051 .int_msk = BIT(14), 1052 .msg = "dup_cnt_wrb_fifo_int", 1053 .reset_level = HNAE3_GLOBAL_RESET 1054 }, { 1055 .int_msk = BIT(15), 1056 .msg = "host_cmd_fifo_int", 1057 .reset_level = HNAE3_GLOBAL_RESET 1058 }, { 1059 .int_msk = BIT(16), 1060 .msg = "mac_cmd_fifo_int", 1061 .reset_level = HNAE3_GLOBAL_RESET 1062 }, { 1063 .int_msk = BIT(17), 1064 .msg = "host_cmd_bitmap_empty_int", 1065 .reset_level = HNAE3_GLOBAL_RESET 1066 }, { 1067 .int_msk = BIT(18), 1068 .msg = "mac_cmd_bitmap_empty_int", 1069 .reset_level = HNAE3_GLOBAL_RESET 1070 }, { 1071 .int_msk = BIT(19), 1072 .msg = "dup_bitmap_empty_int", 1073 .reset_level = HNAE3_GLOBAL_RESET 1074 }, { 1075 .int_msk = BIT(20), 1076 .msg = "out_queue_bitmap_empty_int", 1077 .reset_level = HNAE3_GLOBAL_RESET 1078 }, { 1079 .int_msk = BIT(21), 1080 .msg = "bank2_bitmap_empty_int", 1081 .reset_level = HNAE3_GLOBAL_RESET 1082 }, { 1083 .int_msk = BIT(22), 1084 .msg = "bank1_bitmap_empty_int", 1085 .reset_level = HNAE3_GLOBAL_RESET 1086 }, { 1087 .int_msk = BIT(23), 1088 .msg = "bank0_bitmap_empty_int", 1089 .reset_level = HNAE3_GLOBAL_RESET 1090 }, { 1091 /* sentinel */ 1092 } 1093 }; 1094 1095 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { 1096 { 1097 .int_msk = BIT(0), 1098 .msg = "ets_rd_int_rx_tcg", 1099 .reset_level = HNAE3_GLOBAL_RESET 1100 }, { 1101 .int_msk = BIT(1), 1102 .msg = "ets_wr_int_rx_tcg", 1103 .reset_level = HNAE3_GLOBAL_RESET 1104 }, { 1105 .int_msk = BIT(2), 1106 .msg = "ets_rd_int_tx_tcg", 1107 .reset_level = HNAE3_GLOBAL_RESET 1108 }, { 1109 .int_msk = BIT(3), 1110 .msg = "ets_wr_int_tx_tcg", 1111 .reset_level = HNAE3_GLOBAL_RESET 1112 }, { 1113 /* sentinel */ 1114 } 1115 }; 1116 1117 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { 1118 { 1119 .int_msk = BIT(0), 1120 .msg = "roc_pkt_without_key_port", 1121 .reset_level = HNAE3_FUNC_RESET 1122 }, { 1123 .int_msk = BIT(9), 1124 .msg = "low_water_line_err_port", 1125 .reset_level = HNAE3_NONE_RESET 1126 }, { 1127 .int_msk = BIT(10), 1128 .msg = "hi_water_line_err_port", 1129 .reset_level = HNAE3_GLOBAL_RESET 1130 }, { 1131 /* sentinel */ 1132 } 1133 }; 1134 1135 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { 1136 { 1137 .int_msk = 0, 1138 .msg = "rocee qmm ovf: sgid invalid err" 1139 }, { 1140 .int_msk = 0x4, 1141 .msg = "rocee qmm ovf: sgid ovf err" 1142 }, { 1143 .int_msk = 0x8, 1144 .msg = "rocee qmm ovf: smac invalid err" 1145 }, { 1146 .int_msk = 0xC, 1147 .msg = "rocee qmm ovf: smac ovf err" 1148 }, { 1149 .int_msk = 0x10, 1150 .msg = "rocee qmm ovf: cqc invalid err" 1151 }, { 1152 .int_msk = 0x11, 1153 .msg = "rocee qmm ovf: cqc ovf err" 1154 }, { 1155 .int_msk = 0x12, 1156 .msg = "rocee qmm ovf: cqc hopnum err" 1157 }, { 1158 .int_msk = 0x13, 1159 .msg = "rocee qmm ovf: cqc ba0 err" 1160 }, { 1161 .int_msk = 0x14, 1162 .msg = "rocee qmm ovf: srqc invalid err" 1163 }, { 1164 .int_msk = 0x15, 1165 .msg = "rocee qmm ovf: srqc ovf err" 1166 }, { 1167 .int_msk = 0x16, 1168 .msg = "rocee qmm ovf: srqc hopnum err" 1169 }, { 1170 .int_msk = 0x17, 1171 .msg = "rocee qmm ovf: srqc ba0 err" 1172 }, { 1173 .int_msk = 0x18, 1174 .msg = "rocee qmm ovf: mpt invalid err" 1175 }, { 1176 .int_msk = 0x19, 1177 .msg = "rocee qmm ovf: mpt ovf err" 1178 }, { 1179 .int_msk = 0x1A, 1180 .msg = "rocee qmm ovf: mpt hopnum err" 1181 }, { 1182 .int_msk = 0x1B, 1183 .msg = "rocee qmm ovf: mpt ba0 err" 1184 }, { 1185 .int_msk = 0x1C, 1186 .msg = "rocee qmm ovf: qpc invalid err" 1187 }, { 1188 .int_msk = 0x1D, 1189 .msg = "rocee qmm ovf: qpc ovf err" 1190 }, { 1191 .int_msk = 0x1E, 1192 .msg = "rocee qmm ovf: qpc hopnum err" 1193 }, { 1194 .int_msk = 0x1F, 1195 .msg = "rocee qmm ovf: qpc ba0 err" 1196 }, { 1197 /* sentinel */ 1198 } 1199 }; 1200 1201 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { 1202 { 1203 .module_id = MODULE_NONE, 1204 .msg = "MODULE_NONE" 1205 }, { 1206 .module_id = MODULE_BIOS_COMMON, 1207 .msg = "MODULE_BIOS_COMMON" 1208 }, { 1209 .module_id = MODULE_GE, 1210 .msg = "MODULE_GE" 1211 }, { 1212 .module_id = MODULE_IGU_EGU, 1213 .msg = "MODULE_IGU_EGU" 1214 }, { 1215 .module_id = MODULE_LGE, 1216 .msg = "MODULE_LGE" 1217 }, { 1218 .module_id = MODULE_NCSI, 1219 .msg = "MODULE_NCSI" 1220 }, { 1221 .module_id = MODULE_PPP, 1222 .msg = "MODULE_PPP" 1223 }, { 1224 .module_id = MODULE_QCN, 1225 .msg = "MODULE_QCN" 1226 }, { 1227 .module_id = MODULE_RCB_RX, 1228 .msg = "MODULE_RCB_RX" 1229 }, { 1230 .module_id = MODULE_RTC, 1231 .msg = "MODULE_RTC" 1232 }, { 1233 .module_id = MODULE_SSU, 1234 .msg = "MODULE_SSU" 1235 }, { 1236 .module_id = MODULE_TM, 1237 .msg = "MODULE_TM" 1238 }, { 1239 .module_id = MODULE_RCB_TX, 1240 .msg = "MODULE_RCB_TX" 1241 }, { 1242 .module_id = MODULE_TXDMA, 1243 .msg = "MODULE_TXDMA" 1244 }, { 1245 .module_id = MODULE_MASTER, 1246 .msg = "MODULE_MASTER" 1247 }, { 1248 .module_id = MODULE_HIMAC, 1249 .msg = "MODULE_HIMAC" 1250 }, { 1251 .module_id = MODULE_ROCEE_TOP, 1252 .msg = "MODULE_ROCEE_TOP" 1253 }, { 1254 .module_id = MODULE_ROCEE_TIMER, 1255 .msg = "MODULE_ROCEE_TIMER" 1256 }, { 1257 .module_id = MODULE_ROCEE_MDB, 1258 .msg = "MODULE_ROCEE_MDB" 1259 }, { 1260 .module_id = MODULE_ROCEE_TSP, 1261 .msg = "MODULE_ROCEE_TSP" 1262 }, { 1263 .module_id = MODULE_ROCEE_TRP, 1264 .msg = "MODULE_ROCEE_TRP" 1265 }, { 1266 .module_id = MODULE_ROCEE_SCC, 1267 .msg = "MODULE_ROCEE_SCC" 1268 }, { 1269 .module_id = MODULE_ROCEE_CAEP, 1270 .msg = "MODULE_ROCEE_CAEP" 1271 }, { 1272 .module_id = MODULE_ROCEE_GEN_AC, 1273 .msg = "MODULE_ROCEE_GEN_AC" 1274 }, { 1275 .module_id = MODULE_ROCEE_QMM, 1276 .msg = "MODULE_ROCEE_QMM" 1277 }, { 1278 .module_id = MODULE_ROCEE_LSAN, 1279 .msg = "MODULE_ROCEE_LSAN" 1280 } 1281 }; 1282 1283 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = { 1284 { 1285 .type_id = NONE_ERROR, 1286 .msg = "none_error" 1287 }, { 1288 .type_id = FIFO_ERROR, 1289 .msg = "fifo_error" 1290 }, { 1291 .type_id = MEMORY_ERROR, 1292 .msg = "memory_error" 1293 }, { 1294 .type_id = POISON_ERROR, 1295 .msg = "poison_error" 1296 }, { 1297 .type_id = MSIX_ECC_ERROR, 1298 .msg = "msix_ecc_error" 1299 }, { 1300 .type_id = TQP_INT_ECC_ERROR, 1301 .msg = "tqp_int_ecc_error" 1302 }, { 1303 .type_id = PF_ABNORMAL_INT_ERROR, 1304 .msg = "pf_abnormal_int_error", 1305 .cause_by_vf = true 1306 }, { 1307 .type_id = MPF_ABNORMAL_INT_ERROR, 1308 .msg = "mpf_abnormal_int_error", 1309 .cause_by_vf = true 1310 }, { 1311 .type_id = COMMON_ERROR, 1312 .msg = "common_error" 1313 }, { 1314 .type_id = PORT_ERROR, 1315 .msg = "port_error" 1316 }, { 1317 .type_id = ETS_ERROR, 1318 .msg = "ets_error" 1319 }, { 1320 .type_id = NCSI_ERROR, 1321 .msg = "ncsi_error" 1322 }, { 1323 .type_id = GLB_ERROR, 1324 .msg = "glb_error" 1325 }, { 1326 .type_id = LINK_ERROR, 1327 .msg = "link_error" 1328 }, { 1329 .type_id = PTP_ERROR, 1330 .msg = "ptp_error" 1331 }, { 1332 .type_id = ROCEE_NORMAL_ERR, 1333 .msg = "rocee_normal_error" 1334 }, { 1335 .type_id = ROCEE_OVF_ERR, 1336 .msg = "rocee_ovf_error" 1337 }, { 1338 .type_id = ROCEE_BUS_ERR, 1339 .msg = "rocee_bus_error" 1340 }, 1341 }; 1342 1343 static void hclge_log_error(struct device *dev, char *reg, 1344 const struct hclge_hw_error *err, 1345 u32 err_sts, unsigned long *reset_requests) 1346 { 1347 while (err->msg) { 1348 if (err->int_msk & err_sts) { 1349 dev_err(dev, "%s %s found [error status=0x%x]\n", 1350 reg, err->msg, err_sts); 1351 if (err->reset_level && 1352 err->reset_level != HNAE3_NONE_RESET) 1353 set_bit(err->reset_level, reset_requests); 1354 } 1355 err++; 1356 } 1357 } 1358 1359 /* hclge_cmd_query_error: read the error information 1360 * @hdev: pointer to struct hclge_dev 1361 * @desc: descriptor for describing the command 1362 * @cmd: command opcode 1363 * @flag: flag for extended command structure 1364 * 1365 * This function query the error info from hw register/s using command 1366 */ 1367 static int hclge_cmd_query_error(struct hclge_dev *hdev, 1368 struct hclge_desc *desc, u32 cmd, u16 flag) 1369 { 1370 struct device *dev = &hdev->pdev->dev; 1371 int desc_num = 1; 1372 int ret; 1373 1374 hclge_cmd_setup_basic_desc(&desc[0], cmd, true); 1375 if (flag) { 1376 desc[0].flag |= cpu_to_le16(flag); 1377 hclge_cmd_setup_basic_desc(&desc[1], cmd, true); 1378 desc_num = 2; 1379 } 1380 1381 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1382 if (ret) 1383 dev_err(dev, "query error cmd failed (%d)\n", ret); 1384 1385 return ret; 1386 } 1387 1388 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev) 1389 { 1390 struct hclge_desc desc; 1391 1392 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false); 1393 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR); 1394 1395 return hclge_cmd_send(&hdev->hw, &desc, 1); 1396 } 1397 1398 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) 1399 { 1400 struct device *dev = &hdev->pdev->dev; 1401 struct hclge_desc desc[2]; 1402 int ret; 1403 1404 /* configure common error interrupts */ 1405 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); 1406 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1407 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); 1408 1409 if (en) { 1410 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); 1411 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | 1412 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); 1413 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); 1414 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | 1415 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); 1416 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); 1417 } 1418 1419 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); 1420 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | 1421 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); 1422 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); 1423 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | 1424 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); 1425 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); 1426 1427 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1428 if (ret) 1429 dev_err(dev, 1430 "fail(%d) to configure common err interrupts\n", ret); 1431 1432 return ret; 1433 } 1434 1435 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) 1436 { 1437 struct device *dev = &hdev->pdev->dev; 1438 struct hclge_desc desc; 1439 int ret; 1440 1441 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) 1442 return 0; 1443 1444 /* configure NCSI error interrupts */ 1445 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); 1446 if (en) 1447 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); 1448 1449 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1450 if (ret) 1451 dev_err(dev, 1452 "fail(%d) to configure NCSI error interrupts\n", ret); 1453 1454 return ret; 1455 } 1456 1457 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) 1458 { 1459 struct device *dev = &hdev->pdev->dev; 1460 struct hclge_desc desc; 1461 int ret; 1462 1463 /* configure IGU,EGU error interrupts */ 1464 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); 1465 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE); 1466 if (en) 1467 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN); 1468 1469 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); 1470 1471 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1472 if (ret) { 1473 dev_err(dev, 1474 "fail(%d) to configure IGU common interrupts\n", ret); 1475 return ret; 1476 } 1477 1478 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); 1479 if (en) 1480 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); 1481 1482 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); 1483 1484 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1485 if (ret) { 1486 dev_err(dev, 1487 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); 1488 return ret; 1489 } 1490 1491 ret = hclge_config_ncsi_hw_err_int(hdev, en); 1492 1493 return ret; 1494 } 1495 1496 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, 1497 bool en) 1498 { 1499 struct device *dev = &hdev->pdev->dev; 1500 struct hclge_desc desc[2]; 1501 int ret; 1502 1503 /* configure PPP error interrupts */ 1504 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1505 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1506 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1507 1508 if (cmd == HCLGE_PPP_CMD0_INT_CMD) { 1509 if (en) { 1510 desc[0].data[0] = 1511 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); 1512 desc[0].data[1] = 1513 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); 1514 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); 1515 } 1516 1517 desc[1].data[0] = 1518 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); 1519 desc[1].data[1] = 1520 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); 1521 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1522 desc[1].data[2] = 1523 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); 1524 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { 1525 if (en) { 1526 desc[0].data[0] = 1527 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); 1528 desc[0].data[1] = 1529 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); 1530 } 1531 1532 desc[1].data[0] = 1533 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); 1534 desc[1].data[1] = 1535 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); 1536 } 1537 1538 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1539 if (ret) 1540 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); 1541 1542 return ret; 1543 } 1544 1545 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) 1546 { 1547 int ret; 1548 1549 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, 1550 en); 1551 if (ret) 1552 return ret; 1553 1554 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, 1555 en); 1556 1557 return ret; 1558 } 1559 1560 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) 1561 { 1562 struct device *dev = &hdev->pdev->dev; 1563 struct hclge_desc desc; 1564 int ret; 1565 1566 /* configure TM SCH hw errors */ 1567 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); 1568 if (en) 1569 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); 1570 1571 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1572 if (ret) { 1573 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); 1574 return ret; 1575 } 1576 1577 /* configure TM QCN hw errors */ 1578 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); 1579 desc.data[0] = cpu_to_le32(HCLGE_TM_QCN_ERR_INT_TYPE); 1580 if (en) { 1581 desc.data[0] |= cpu_to_le32(HCLGE_TM_QCN_FIFO_INT_EN); 1582 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); 1583 } 1584 1585 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1586 if (ret) 1587 dev_err(dev, 1588 "fail(%d) to configure TM QCN mem errors\n", ret); 1589 1590 return ret; 1591 } 1592 1593 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) 1594 { 1595 struct device *dev = &hdev->pdev->dev; 1596 struct hclge_desc desc; 1597 int ret; 1598 1599 /* configure MAC common error interrupts */ 1600 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); 1601 if (en) 1602 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); 1603 1604 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); 1605 1606 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1607 if (ret) 1608 dev_err(dev, 1609 "fail(%d) to configure MAC COMMON error intr\n", ret); 1610 1611 return ret; 1612 } 1613 1614 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en) 1615 { 1616 struct hclge_desc desc; 1617 1618 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false); 1619 if (en) 1620 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN); 1621 else 1622 desc.data[0] = 0; 1623 1624 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK); 1625 1626 return hclge_cmd_send(&hdev->hw, &desc, 1); 1627 } 1628 1629 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, 1630 bool en) 1631 { 1632 struct device *dev = &hdev->pdev->dev; 1633 struct hclge_desc desc[2]; 1634 int desc_num = 1; 1635 int ret; 1636 1637 /* configure PPU error interrupts */ 1638 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { 1639 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1640 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1641 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1642 if (en) { 1643 desc[0].data[0] = 1644 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN); 1645 desc[0].data[1] = 1646 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN); 1647 desc[1].data[3] = 1648 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN); 1649 desc[1].data[4] = 1650 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN); 1651 } 1652 1653 desc[1].data[0] = 1654 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK); 1655 desc[1].data[1] = 1656 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK); 1657 desc[1].data[2] = 1658 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK); 1659 desc[1].data[3] |= 1660 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK); 1661 desc_num = 2; 1662 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { 1663 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1664 if (en) 1665 desc[0].data[0] = 1666 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2); 1667 1668 desc[0].data[2] = 1669 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK); 1670 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { 1671 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1672 if (en) 1673 desc[0].data[0] = 1674 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN); 1675 1676 desc[0].data[2] = 1677 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK); 1678 } else { 1679 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); 1680 return -EINVAL; 1681 } 1682 1683 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1684 1685 return ret; 1686 } 1687 1688 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) 1689 { 1690 struct device *dev = &hdev->pdev->dev; 1691 int ret; 1692 1693 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, 1694 en); 1695 if (ret) { 1696 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", 1697 ret); 1698 return ret; 1699 } 1700 1701 ret = hclge_config_ppu_error_interrupts(hdev, 1702 HCLGE_PPU_MPF_OTHER_INT_CMD, 1703 en); 1704 if (ret) { 1705 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); 1706 return ret; 1707 } 1708 1709 ret = hclge_config_ppu_error_interrupts(hdev, 1710 HCLGE_PPU_PF_OTHER_INT_CMD, en); 1711 if (ret) 1712 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", 1713 ret); 1714 return ret; 1715 } 1716 1717 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) 1718 { 1719 struct device *dev = &hdev->pdev->dev; 1720 struct hclge_desc desc[2]; 1721 int ret; 1722 1723 /* configure SSU ecc error interrupts */ 1724 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); 1725 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1726 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); 1727 if (en) { 1728 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); 1729 desc[0].data[1] = 1730 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); 1731 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); 1732 } 1733 1734 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); 1735 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); 1736 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); 1737 1738 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1739 if (ret) { 1740 dev_err(dev, 1741 "fail(%d) to configure SSU ECC error interrupt\n", ret); 1742 return ret; 1743 } 1744 1745 /* configure SSU common error interrupts */ 1746 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); 1747 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1748 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); 1749 1750 if (en) { 1751 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1752 desc[0].data[0] = 1753 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); 1754 else 1755 desc[0].data[0] = 1756 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); 1757 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); 1758 desc[0].data[2] = 1759 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); 1760 } 1761 1762 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | 1763 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); 1764 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); 1765 1766 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1767 if (ret) 1768 dev_err(dev, 1769 "fail(%d) to configure SSU COMMON error intr\n", ret); 1770 1771 return ret; 1772 } 1773 1774 /* hclge_query_bd_num: query number of buffer descriptors 1775 * @hdev: pointer to struct hclge_dev 1776 * @is_ras: true for ras, false for msix 1777 * @mpf_bd_num: number of main PF interrupt buffer descriptors 1778 * @pf_bd_num: number of not main PF interrupt buffer descriptors 1779 * 1780 * This function querys number of mpf and pf buffer descriptors. 1781 */ 1782 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, 1783 u32 *mpf_bd_num, u32 *pf_bd_num) 1784 { 1785 struct device *dev = &hdev->pdev->dev; 1786 u32 mpf_min_bd_num, pf_min_bd_num; 1787 enum hclge_opcode_type opcode; 1788 struct hclge_desc desc_bd; 1789 int ret; 1790 1791 if (is_ras) { 1792 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; 1793 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; 1794 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; 1795 } else { 1796 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; 1797 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; 1798 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; 1799 } 1800 1801 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); 1802 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 1803 if (ret) { 1804 dev_err(dev, "fail(%d) to query msix int status bd num\n", 1805 ret); 1806 return ret; 1807 } 1808 1809 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); 1810 *pf_bd_num = le32_to_cpu(desc_bd.data[1]); 1811 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { 1812 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", 1813 *mpf_bd_num, *pf_bd_num); 1814 return -EINVAL; 1815 } 1816 1817 return 0; 1818 } 1819 1820 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors 1821 * @hdev: pointer to struct hclge_dev 1822 * @desc: descriptor for describing the command 1823 * @num: number of extended command structures 1824 * 1825 * This function handles all the main PF RAS errors in the 1826 * hw register/s using command. 1827 */ 1828 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, 1829 struct hclge_desc *desc, 1830 int num) 1831 { 1832 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1833 struct device *dev = &hdev->pdev->dev; 1834 __le32 *desc_data; 1835 u32 status; 1836 int ret; 1837 1838 /* query all main PF RAS errors */ 1839 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, 1840 true); 1841 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1842 if (ret) { 1843 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); 1844 return ret; 1845 } 1846 1847 /* log HNS common errors */ 1848 status = le32_to_cpu(desc[0].data[0]); 1849 if (status) 1850 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", 1851 &hclge_imp_tcm_ecc_int[0], status, 1852 &ae_dev->hw_err_reset_req); 1853 1854 status = le32_to_cpu(desc[0].data[1]); 1855 if (status) 1856 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", 1857 &hclge_cmdq_nic_mem_ecc_int[0], status, 1858 &ae_dev->hw_err_reset_req); 1859 1860 if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) 1861 dev_warn(dev, "imp_rd_data_poison_err found\n"); 1862 1863 status = le32_to_cpu(desc[0].data[3]); 1864 if (status) 1865 hclge_log_error(dev, "TQP_INT_ECC_INT_STS", 1866 &hclge_tqp_int_ecc_int[0], status, 1867 &ae_dev->hw_err_reset_req); 1868 1869 status = le32_to_cpu(desc[0].data[4]); 1870 if (status) 1871 hclge_log_error(dev, "MSIX_ECC_INT_STS", 1872 &hclge_msix_sram_ecc_int[0], status, 1873 &ae_dev->hw_err_reset_req); 1874 1875 /* log SSU(Storage Switch Unit) errors */ 1876 desc_data = (__le32 *)&desc[2]; 1877 status = le32_to_cpu(*(desc_data + 2)); 1878 if (status) 1879 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", 1880 &hclge_ssu_mem_ecc_err_int[0], status, 1881 &ae_dev->hw_err_reset_req); 1882 1883 status = le32_to_cpu(*(desc_data + 3)) & BIT(0); 1884 if (status) { 1885 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", 1886 status); 1887 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1888 } 1889 1890 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; 1891 if (status) 1892 hclge_log_error(dev, "SSU_COMMON_ERR_INT", 1893 &hclge_ssu_com_err_int[0], status, 1894 &ae_dev->hw_err_reset_req); 1895 1896 /* log IGU(Ingress Unit) errors */ 1897 desc_data = (__le32 *)&desc[3]; 1898 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; 1899 if (status) 1900 hclge_log_error(dev, "IGU_INT_STS", 1901 &hclge_igu_int[0], status, 1902 &ae_dev->hw_err_reset_req); 1903 1904 /* log PPP(Programmable Packet Process) errors */ 1905 desc_data = (__le32 *)&desc[4]; 1906 status = le32_to_cpu(*(desc_data + 1)); 1907 if (status) 1908 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", 1909 &hclge_ppp_mpf_abnormal_int_st1[0], status, 1910 &ae_dev->hw_err_reset_req); 1911 1912 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; 1913 if (status) 1914 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", 1915 &hclge_ppp_mpf_abnormal_int_st3[0], status, 1916 &ae_dev->hw_err_reset_req); 1917 1918 /* log PPU(RCB) errors */ 1919 desc_data = (__le32 *)&desc[5]; 1920 status = le32_to_cpu(*(desc_data + 1)); 1921 if (status) { 1922 dev_err(dev, 1923 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n"); 1924 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1925 } 1926 1927 status = le32_to_cpu(*(desc_data + 2)); 1928 if (status) 1929 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", 1930 &hclge_ppu_mpf_abnormal_int_st2[0], status, 1931 &ae_dev->hw_err_reset_req); 1932 1933 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; 1934 if (status) 1935 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", 1936 &hclge_ppu_mpf_abnormal_int_st3[0], status, 1937 &ae_dev->hw_err_reset_req); 1938 1939 /* log TM(Traffic Manager) errors */ 1940 desc_data = (__le32 *)&desc[6]; 1941 status = le32_to_cpu(*desc_data); 1942 if (status) 1943 hclge_log_error(dev, "TM_SCH_RINT", 1944 &hclge_tm_sch_rint[0], status, 1945 &ae_dev->hw_err_reset_req); 1946 1947 /* log QCN(Quantized Congestion Control) errors */ 1948 desc_data = (__le32 *)&desc[7]; 1949 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; 1950 if (status) 1951 hclge_log_error(dev, "QCN_FIFO_RINT", 1952 &hclge_qcn_fifo_rint[0], status, 1953 &ae_dev->hw_err_reset_req); 1954 1955 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; 1956 if (status) 1957 hclge_log_error(dev, "QCN_ECC_RINT", 1958 &hclge_qcn_ecc_rint[0], status, 1959 &ae_dev->hw_err_reset_req); 1960 1961 /* log NCSI errors */ 1962 desc_data = (__le32 *)&desc[9]; 1963 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; 1964 if (status) 1965 hclge_log_error(dev, "NCSI_ECC_INT_RPT", 1966 &hclge_ncsi_err_int[0], status, 1967 &ae_dev->hw_err_reset_req); 1968 1969 /* clear all main PF RAS errors */ 1970 hclge_comm_cmd_reuse_desc(&desc[0], false); 1971 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1972 if (ret) 1973 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); 1974 1975 return ret; 1976 } 1977 1978 /* hclge_handle_pf_ras_error: handle all PF RAS errors 1979 * @hdev: pointer to struct hclge_dev 1980 * @desc: descriptor for describing the command 1981 * @num: number of extended command structures 1982 * 1983 * This function handles all the PF RAS errors in the 1984 * hw registers using command. 1985 */ 1986 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, 1987 struct hclge_desc *desc, 1988 int num) 1989 { 1990 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1991 struct device *dev = &hdev->pdev->dev; 1992 __le32 *desc_data; 1993 u32 status; 1994 int ret; 1995 1996 /* query all PF RAS errors */ 1997 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, 1998 true); 1999 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2000 if (ret) { 2001 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); 2002 return ret; 2003 } 2004 2005 /* log SSU(Storage Switch Unit) errors */ 2006 status = le32_to_cpu(desc[0].data[0]); 2007 if (status) 2008 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2009 &hclge_ssu_port_based_err_int[0], status, 2010 &ae_dev->hw_err_reset_req); 2011 2012 status = le32_to_cpu(desc[0].data[1]); 2013 if (status) 2014 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", 2015 &hclge_ssu_fifo_overflow_int[0], status, 2016 &ae_dev->hw_err_reset_req); 2017 2018 status = le32_to_cpu(desc[0].data[2]); 2019 if (status) 2020 hclge_log_error(dev, "SSU_ETS_TCG_INT", 2021 &hclge_ssu_ets_tcg_int[0], status, 2022 &ae_dev->hw_err_reset_req); 2023 2024 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ 2025 desc_data = (__le32 *)&desc[1]; 2026 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; 2027 if (status) 2028 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", 2029 &hclge_igu_egu_tnl_int[0], status, 2030 &ae_dev->hw_err_reset_req); 2031 2032 /* log PPU(RCB) errors */ 2033 desc_data = (__le32 *)&desc[3]; 2034 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; 2035 if (status) { 2036 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", 2037 &hclge_ppu_pf_abnormal_int[0], status, 2038 &ae_dev->hw_err_reset_req); 2039 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR); 2040 } 2041 2042 /* clear all PF RAS errors */ 2043 hclge_comm_cmd_reuse_desc(&desc[0], false); 2044 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2045 if (ret) 2046 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); 2047 2048 return ret; 2049 } 2050 2051 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) 2052 { 2053 u32 mpf_bd_num, pf_bd_num, bd_num; 2054 struct hclge_desc *desc; 2055 int ret; 2056 2057 /* query the number of registers in the RAS int status */ 2058 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); 2059 if (ret) 2060 return ret; 2061 2062 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2063 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2064 if (!desc) 2065 return -ENOMEM; 2066 2067 /* handle all main PF RAS errors */ 2068 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); 2069 if (ret) { 2070 kfree(desc); 2071 return ret; 2072 } 2073 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2074 2075 /* handle all PF RAS errors */ 2076 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); 2077 kfree(desc); 2078 2079 return ret; 2080 } 2081 2082 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) 2083 { 2084 struct device *dev = &hdev->pdev->dev; 2085 struct hclge_desc desc[3]; 2086 int ret; 2087 2088 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2089 true); 2090 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2091 true); 2092 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2093 true); 2094 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2095 desc[1].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2096 2097 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); 2098 if (ret) { 2099 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); 2100 return ret; 2101 } 2102 2103 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", 2104 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2105 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2106 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2107 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", 2108 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), 2109 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), 2110 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); 2111 dev_err(dev, "AXI3: %08X %08X %08X %08X\n", 2112 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), 2113 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); 2114 2115 return 0; 2116 } 2117 2118 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) 2119 { 2120 struct device *dev = &hdev->pdev->dev; 2121 struct hclge_desc desc[2]; 2122 int ret; 2123 2124 ret = hclge_cmd_query_error(hdev, &desc[0], 2125 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, 2126 HCLGE_COMM_CMD_FLAG_NEXT); 2127 if (ret) { 2128 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); 2129 return ret; 2130 } 2131 2132 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", 2133 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2134 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2135 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2136 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), 2137 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); 2138 2139 return 0; 2140 } 2141 2142 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) 2143 { 2144 struct device *dev = &hdev->pdev->dev; 2145 struct hclge_desc desc[2]; 2146 int ret; 2147 2148 /* read overflow error status */ 2149 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 2150 0); 2151 if (ret) { 2152 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); 2153 return ret; 2154 } 2155 2156 /* log overflow error */ 2157 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2158 const struct hclge_hw_error *err; 2159 u32 err_sts; 2160 2161 err = &hclge_rocee_qmm_ovf_err_int[0]; 2162 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & 2163 le32_to_cpu(desc[0].data[0]); 2164 while (err->msg) { 2165 if (err->int_msk == err_sts) { 2166 dev_err(dev, "%s [error status=0x%x] found\n", 2167 err->msg, 2168 le32_to_cpu(desc[0].data[0])); 2169 break; 2170 } 2171 err++; 2172 } 2173 } 2174 2175 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2176 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n", 2177 le32_to_cpu(desc[0].data[1])); 2178 } 2179 2180 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2181 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n", 2182 le32_to_cpu(desc[0].data[2])); 2183 } 2184 2185 return 0; 2186 } 2187 2188 static enum hnae3_reset_type 2189 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) 2190 { 2191 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET; 2192 struct device *dev = &hdev->pdev->dev; 2193 struct hclge_desc desc[2]; 2194 unsigned int status; 2195 int ret; 2196 2197 /* read RAS error interrupt status */ 2198 ret = hclge_cmd_query_error(hdev, &desc[0], 2199 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0); 2200 if (ret) { 2201 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); 2202 /* reset everything for now */ 2203 return HNAE3_GLOBAL_RESET; 2204 } 2205 2206 status = le32_to_cpu(desc[0].data[0]); 2207 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { 2208 if (status & HCLGE_ROCEE_RERR_INT_MASK) 2209 dev_err(dev, "ROCEE RAS AXI rresp error\n"); 2210 2211 if (status & HCLGE_ROCEE_BERR_INT_MASK) 2212 dev_err(dev, "ROCEE RAS AXI bresp error\n"); 2213 2214 reset_type = HNAE3_FUNC_RESET; 2215 2216 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR); 2217 2218 ret = hclge_log_rocee_axi_error(hdev); 2219 if (ret) 2220 return HNAE3_GLOBAL_RESET; 2221 } 2222 2223 if (status & HCLGE_ROCEE_ECC_INT_MASK) { 2224 dev_err(dev, "ROCEE RAS 2bit ECC error\n"); 2225 reset_type = HNAE3_GLOBAL_RESET; 2226 2227 ret = hclge_log_rocee_ecc_error(hdev); 2228 if (ret) 2229 return HNAE3_GLOBAL_RESET; 2230 } 2231 2232 if (status & HCLGE_ROCEE_OVF_INT_MASK) { 2233 ret = hclge_log_rocee_ovf_error(hdev); 2234 if (ret) { 2235 dev_err(dev, "failed(%d) to process ovf error\n", ret); 2236 /* reset everything for now */ 2237 return HNAE3_GLOBAL_RESET; 2238 } 2239 } 2240 2241 /* clear error status */ 2242 hclge_comm_cmd_reuse_desc(&desc[0], false); 2243 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 2244 if (ret) { 2245 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); 2246 /* reset everything for now */ 2247 return HNAE3_GLOBAL_RESET; 2248 } 2249 2250 return reset_type; 2251 } 2252 2253 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) 2254 { 2255 struct device *dev = &hdev->pdev->dev; 2256 struct hclge_desc desc; 2257 int ret; 2258 2259 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 || 2260 !hnae3_dev_roce_supported(hdev)) 2261 return 0; 2262 2263 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); 2264 if (en) { 2265 /* enable ROCEE hw error interrupts */ 2266 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); 2267 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); 2268 2269 hclge_log_and_clear_rocee_ras_error(hdev); 2270 } 2271 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); 2272 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); 2273 2274 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2275 if (ret) 2276 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); 2277 2278 return ret; 2279 } 2280 2281 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) 2282 { 2283 struct hclge_dev *hdev = ae_dev->priv; 2284 enum hnae3_reset_type reset_type; 2285 2286 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) 2287 return; 2288 2289 reset_type = hclge_log_and_clear_rocee_ras_error(hdev); 2290 if (reset_type != HNAE3_NONE_RESET) 2291 set_bit(reset_type, &ae_dev->hw_err_reset_req); 2292 } 2293 2294 static const struct hclge_hw_blk hw_blk[] = { 2295 { 2296 .msk = BIT(0), 2297 .name = "IGU_EGU", 2298 .config_err_int = hclge_config_igu_egu_hw_err_int, 2299 }, { 2300 .msk = BIT(1), 2301 .name = "PPP", 2302 .config_err_int = hclge_config_ppp_hw_err_int, 2303 }, { 2304 .msk = BIT(2), 2305 .name = "SSU", 2306 .config_err_int = hclge_config_ssu_hw_err_int, 2307 }, { 2308 .msk = BIT(3), 2309 .name = "PPU", 2310 .config_err_int = hclge_config_ppu_hw_err_int, 2311 }, { 2312 .msk = BIT(4), 2313 .name = "TM", 2314 .config_err_int = hclge_config_tm_hw_err_int, 2315 }, { 2316 .msk = BIT(5), 2317 .name = "COMMON", 2318 .config_err_int = hclge_config_common_hw_err_int, 2319 }, { 2320 .msk = BIT(8), 2321 .name = "MAC", 2322 .config_err_int = hclge_config_mac_err_int, 2323 }, { 2324 /* sentinel */ 2325 } 2326 }; 2327 2328 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable) 2329 { 2330 u32 reg_val; 2331 2332 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG); 2333 2334 if (enable) 2335 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2336 else 2337 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2338 2339 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val); 2340 } 2341 2342 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) 2343 { 2344 const struct hclge_hw_blk *module = hw_blk; 2345 int ret = 0; 2346 2347 hclge_config_all_msix_error(hdev, state); 2348 2349 while (module->name) { 2350 if (module->config_err_int) { 2351 ret = module->config_err_int(hdev, state); 2352 if (ret) 2353 return ret; 2354 } 2355 module++; 2356 } 2357 2358 return ret; 2359 } 2360 2361 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) 2362 { 2363 struct hclge_dev *hdev = ae_dev->priv; 2364 struct device *dev = &hdev->pdev->dev; 2365 u32 status; 2366 2367 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2368 dev_err(dev, 2369 "Can't recover - RAS error reported during dev init\n"); 2370 return PCI_ERS_RESULT_NONE; 2371 } 2372 2373 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2374 if (status & HCLGE_RAS_REG_NFE_MASK || 2375 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) 2376 ae_dev->hw_err_reset_req = 0; 2377 else 2378 goto out; 2379 2380 /* Handling Non-fatal HNS RAS errors */ 2381 if (status & HCLGE_RAS_REG_NFE_MASK) { 2382 dev_err(dev, 2383 "HNS Non-Fatal RAS error(status=0x%x) identified\n", 2384 status); 2385 hclge_handle_all_ras_errors(hdev); 2386 } 2387 2388 /* Handling Non-fatal Rocee RAS errors */ 2389 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && 2390 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { 2391 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n"); 2392 hclge_handle_rocee_ras_error(ae_dev); 2393 } 2394 2395 if (ae_dev->hw_err_reset_req) 2396 return PCI_ERS_RESULT_NEED_RESET; 2397 2398 out: 2399 return PCI_ERS_RESULT_RECOVERED; 2400 } 2401 2402 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, 2403 struct hclge_desc *desc, bool is_mpf, 2404 u32 bd_num) 2405 { 2406 if (is_mpf) 2407 desc[0].opcode = 2408 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); 2409 else 2410 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); 2411 2412 desc[0].flag = cpu_to_le16(HCLGE_COMM_CMD_FLAG_NO_INTR | 2413 HCLGE_COMM_CMD_FLAG_IN); 2414 2415 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); 2416 } 2417 2418 /* hclge_query_8bd_info: query information about over_8bd_nfe_err 2419 * @hdev: pointer to struct hclge_dev 2420 * @vf_id: Index of the virtual function with error 2421 * @q_id: Physical index of the queue with error 2422 * 2423 * This function get specific index of queue and function which causes 2424 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is 2425 * caused by PF instead of VF. 2426 */ 2427 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, 2428 u16 *q_id) 2429 { 2430 struct hclge_query_ppu_pf_other_int_dfx_cmd *req; 2431 struct hclge_desc desc; 2432 int ret; 2433 2434 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); 2435 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2436 if (ret) 2437 return ret; 2438 2439 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; 2440 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); 2441 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); 2442 2443 return 0; 2444 } 2445 2446 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err 2447 * @hdev: pointer to struct hclge_dev 2448 * @reset_requests: reset level that we need to trigger later 2449 * 2450 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in 2451 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. 2452 */ 2453 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, 2454 unsigned long *reset_requests) 2455 { 2456 struct device *dev = &hdev->pdev->dev; 2457 u16 vf_id; 2458 u16 q_id; 2459 int ret; 2460 2461 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); 2462 if (ret) { 2463 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", 2464 ret); 2465 return; 2466 } 2467 2468 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n", 2469 vf_id, q_id); 2470 2471 if (vf_id) { 2472 if (vf_id >= hdev->num_alloc_vport) { 2473 dev_err(dev, "invalid vport(%u)\n", vf_id); 2474 return; 2475 } 2476 2477 /* If we need to trigger other reset whose level is higher 2478 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset 2479 * here. 2480 */ 2481 if (*reset_requests != 0) 2482 return; 2483 2484 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); 2485 if (ret) 2486 dev_err(dev, "inform reset to vport(%u) failed %d!\n", 2487 vf_id, ret); 2488 } else { 2489 set_bit(HNAE3_FUNC_RESET, reset_requests); 2490 } 2491 } 2492 2493 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors 2494 * @hdev: pointer to struct hclge_dev 2495 * @desc: descriptor for describing the command 2496 * @mpf_bd_num: number of extended command structures 2497 * @reset_requests: record of the reset level that we need 2498 * 2499 * This function handles all the main PF MSI-X errors in the hw register/s 2500 * using command. 2501 */ 2502 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, 2503 struct hclge_desc *desc, 2504 int mpf_bd_num, 2505 unsigned long *reset_requests) 2506 { 2507 struct device *dev = &hdev->pdev->dev; 2508 __le32 *desc_data; 2509 u32 status; 2510 int ret; 2511 /* query all main PF MSIx errors */ 2512 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, 2513 true); 2514 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); 2515 if (ret) { 2516 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); 2517 return ret; 2518 } 2519 2520 /* log MAC errors */ 2521 desc_data = (__le32 *)&desc[1]; 2522 status = le32_to_cpu(*desc_data); 2523 if (status) 2524 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", 2525 &hclge_mac_afifo_tnl_int[0], status, 2526 reset_requests); 2527 2528 /* log PPU(RCB) MPF errors */ 2529 desc_data = (__le32 *)&desc[5]; 2530 status = le32_to_cpu(*(desc_data + 2)) & 2531 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; 2532 if (status) 2533 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", 2534 status); 2535 2536 /* clear all main PF MSIx errors */ 2537 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2538 if (ret) 2539 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); 2540 2541 return ret; 2542 } 2543 2544 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors 2545 * @hdev: pointer to struct hclge_dev 2546 * @desc: descriptor for describing the command 2547 * @mpf_bd_num: number of extended command structures 2548 * @reset_requests: record of the reset level that we need 2549 * 2550 * This function handles all the PF MSI-X errors in the hw register/s using 2551 * command. 2552 */ 2553 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, 2554 struct hclge_desc *desc, 2555 int pf_bd_num, 2556 unsigned long *reset_requests) 2557 { 2558 struct device *dev = &hdev->pdev->dev; 2559 __le32 *desc_data; 2560 u32 status; 2561 int ret; 2562 2563 /* query all PF MSIx errors */ 2564 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, 2565 true); 2566 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); 2567 if (ret) { 2568 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); 2569 return ret; 2570 } 2571 2572 /* log SSU PF errors */ 2573 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; 2574 if (status) 2575 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2576 &hclge_ssu_port_based_pf_int[0], 2577 status, reset_requests); 2578 2579 /* read and log PPP PF errors */ 2580 desc_data = (__le32 *)&desc[2]; 2581 status = le32_to_cpu(*desc_data); 2582 if (status) 2583 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", 2584 &hclge_ppp_pf_abnormal_int[0], 2585 status, reset_requests); 2586 2587 /* log PPU(RCB) PF errors */ 2588 desc_data = (__le32 *)&desc[3]; 2589 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; 2590 if (status) 2591 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", 2592 &hclge_ppu_pf_abnormal_int[0], 2593 status, reset_requests); 2594 2595 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; 2596 if (status) 2597 hclge_handle_over_8bd_err(hdev, reset_requests); 2598 2599 /* clear all PF MSIx errors */ 2600 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2601 if (ret) 2602 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); 2603 2604 return ret; 2605 } 2606 2607 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, 2608 unsigned long *reset_requests) 2609 { 2610 u32 mpf_bd_num, pf_bd_num, bd_num; 2611 struct hclge_desc *desc; 2612 int ret; 2613 2614 /* query the number of bds for the MSIx int status */ 2615 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2616 if (ret) 2617 goto out; 2618 2619 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2620 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2621 if (!desc) 2622 return -ENOMEM; 2623 2624 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, 2625 reset_requests); 2626 if (ret) 2627 goto msi_error; 2628 2629 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2630 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); 2631 if (ret) 2632 goto msi_error; 2633 2634 ret = hclge_handle_mac_tnl(hdev); 2635 2636 msi_error: 2637 kfree(desc); 2638 out: 2639 return ret; 2640 } 2641 2642 int hclge_handle_hw_msix_error(struct hclge_dev *hdev, 2643 unsigned long *reset_requests) 2644 { 2645 struct device *dev = &hdev->pdev->dev; 2646 2647 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2648 dev_err(dev, 2649 "failed to handle msix error during dev init\n"); 2650 return -EAGAIN; 2651 } 2652 2653 return hclge_handle_all_hw_msix_error(hdev, reset_requests); 2654 } 2655 2656 int hclge_handle_mac_tnl(struct hclge_dev *hdev) 2657 { 2658 struct hclge_mac_tnl_stats mac_tnl_stats; 2659 struct device *dev = &hdev->pdev->dev; 2660 struct hclge_desc desc; 2661 u32 status; 2662 int ret; 2663 2664 /* query and clear mac tnl interruptions */ 2665 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true); 2666 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2667 if (ret) { 2668 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret); 2669 return ret; 2670 } 2671 2672 status = le32_to_cpu(desc.data[0]); 2673 if (status) { 2674 /* When mac tnl interrupt occurs, we record current time and 2675 * register status here in a fifo, then clear the status. So 2676 * that if link status changes suddenly at some time, we can 2677 * query them by debugfs. 2678 */ 2679 mac_tnl_stats.time = local_clock(); 2680 mac_tnl_stats.status = status; 2681 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats); 2682 ret = hclge_clear_mac_tnl_int(hdev); 2683 if (ret) 2684 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n", 2685 ret); 2686 } 2687 2688 return ret; 2689 } 2690 2691 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) 2692 { 2693 struct hclge_dev *hdev = ae_dev->priv; 2694 struct device *dev = &hdev->pdev->dev; 2695 u32 mpf_bd_num, pf_bd_num, bd_num; 2696 struct hclge_desc *desc; 2697 u32 status; 2698 int ret; 2699 2700 ae_dev->hw_err_reset_req = 0; 2701 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2702 2703 /* query the number of bds for the MSIx int status */ 2704 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 2705 if (ret) 2706 return; 2707 2708 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2709 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2710 if (!desc) 2711 return; 2712 2713 /* Clear HNS hw errors reported through msix */ 2714 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - 2715 HCLGE_DESC_NO_DATA_LEN); 2716 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2717 if (ret) { 2718 dev_err(dev, "fail(%d) to clear mpf msix int during init\n", 2719 ret); 2720 goto msi_error; 2721 } 2722 2723 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - 2724 HCLGE_DESC_NO_DATA_LEN); 2725 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 2726 if (ret) { 2727 dev_err(dev, "fail(%d) to clear pf msix int during init\n", 2728 ret); 2729 goto msi_error; 2730 } 2731 2732 /* Handle Non-fatal HNS RAS errors */ 2733 if (status & HCLGE_RAS_REG_NFE_MASK) { 2734 dev_err(dev, "HNS hw error(RAS) identified during init\n"); 2735 hclge_handle_all_ras_errors(hdev); 2736 } 2737 2738 msi_error: 2739 kfree(desc); 2740 } 2741 2742 bool hclge_find_error_source(struct hclge_dev *hdev) 2743 { 2744 u32 msix_src_flag, hw_err_src_flag; 2745 2746 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) & 2747 HCLGE_VECTOR0_REG_MSIX_MASK; 2748 2749 hw_err_src_flag = hclge_read_dev(&hdev->hw, 2750 HCLGE_RAS_PF_OTHER_INT_STS_REG) & 2751 HCLGE_RAS_REG_ERR_MASK; 2752 2753 return msix_src_flag || hw_err_src_flag; 2754 } 2755 2756 void hclge_handle_occurred_error(struct hclge_dev *hdev) 2757 { 2758 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); 2759 2760 if (hclge_find_error_source(hdev)) 2761 hclge_handle_error_info_log(ae_dev); 2762 } 2763 2764 static bool 2765 hclge_handle_error_type_reg_log(struct device *dev, 2766 struct hclge_mod_err_info *mod_info, 2767 struct hclge_type_reg_err_info *type_reg_info) 2768 { 2769 #define HCLGE_ERR_TYPE_MASK 0x7F 2770 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7 2771 2772 u8 mod_id, total_module, type_id, total_type, i, is_ras; 2773 u8 index_module = MODULE_NONE; 2774 u8 index_type = NONE_ERROR; 2775 bool cause_by_vf = false; 2776 2777 mod_id = mod_info->mod_id; 2778 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK; 2779 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET; 2780 2781 total_module = ARRAY_SIZE(hclge_hw_module_id_st); 2782 total_type = ARRAY_SIZE(hclge_hw_type_id_st); 2783 2784 for (i = 0; i < total_module; i++) { 2785 if (mod_id == hclge_hw_module_id_st[i].module_id) { 2786 index_module = i; 2787 break; 2788 } 2789 } 2790 2791 for (i = 0; i < total_type; i++) { 2792 if (type_id == hclge_hw_type_id_st[i].type_id) { 2793 index_type = i; 2794 cause_by_vf = hclge_hw_type_id_st[i].cause_by_vf; 2795 break; 2796 } 2797 } 2798 2799 if (index_module != MODULE_NONE && index_type != NONE_ERROR) 2800 dev_err(dev, 2801 "found %s %s, is %s error.\n", 2802 hclge_hw_module_id_st[index_module].msg, 2803 hclge_hw_type_id_st[index_type].msg, 2804 is_ras ? "ras" : "msix"); 2805 else 2806 dev_err(dev, 2807 "unknown module[%u] or type[%u].\n", mod_id, type_id); 2808 2809 dev_err(dev, "reg_value:\n"); 2810 for (i = 0; i < type_reg_info->reg_num; i++) 2811 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); 2812 2813 return cause_by_vf; 2814 } 2815 2816 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, 2817 const u32 *buf, u32 buf_size) 2818 { 2819 struct hclge_type_reg_err_info *type_reg_info; 2820 struct hclge_dev *hdev = ae_dev->priv; 2821 struct device *dev = &hdev->pdev->dev; 2822 struct hclge_mod_err_info *mod_info; 2823 struct hclge_sum_err_info *sum_info; 2824 bool cause_by_vf = false; 2825 u8 mod_num, err_num, i; 2826 u32 offset = 0; 2827 2828 sum_info = (struct hclge_sum_err_info *)&buf[offset++]; 2829 if (sum_info->reset_type && 2830 sum_info->reset_type != HNAE3_NONE_RESET) 2831 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req); 2832 mod_num = sum_info->mod_num; 2833 2834 while (mod_num--) { 2835 if (offset >= buf_size) { 2836 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n", 2837 offset, buf_size); 2838 return; 2839 } 2840 mod_info = (struct hclge_mod_err_info *)&buf[offset++]; 2841 err_num = mod_info->err_num; 2842 2843 for (i = 0; i < err_num; i++) { 2844 if (offset >= buf_size) { 2845 dev_err(dev, 2846 "The offset(%u) exceeds buf size(%u).\n", 2847 offset, buf_size); 2848 return; 2849 } 2850 2851 type_reg_info = (struct hclge_type_reg_err_info *) 2852 &buf[offset++]; 2853 if (hclge_handle_error_type_reg_log(dev, mod_info, 2854 type_reg_info)) 2855 cause_by_vf = true; 2856 2857 offset += type_reg_info->reg_num; 2858 } 2859 } 2860 2861 if (hnae3_ae_dev_vf_fault_supported(hdev->ae_dev) && cause_by_vf) 2862 set_bit(HNAE3_VF_EXP_RESET, &ae_dev->hw_err_reset_req); 2863 } 2864 2865 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num) 2866 { 2867 struct device *dev = &hdev->pdev->dev; 2868 struct hclge_desc desc_bd; 2869 int ret; 2870 2871 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true); 2872 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 2873 if (ret) { 2874 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret); 2875 return ret; 2876 } 2877 2878 *bd_num = le32_to_cpu(desc_bd.data[0]); 2879 if (!(*bd_num)) { 2880 dev_err(dev, "The value of bd_num is 0!\n"); 2881 return -EINVAL; 2882 } 2883 2884 return 0; 2885 } 2886 2887 static int hclge_query_all_err_info(struct hclge_dev *hdev, 2888 struct hclge_desc *desc, u32 bd_num) 2889 { 2890 struct device *dev = &hdev->pdev->dev; 2891 int ret; 2892 2893 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true); 2894 ret = hclge_cmd_send(&hdev->hw, desc, bd_num); 2895 if (ret) 2896 dev_err(dev, "failed to query error info, ret = %d.\n", ret); 2897 2898 return ret; 2899 } 2900 2901 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev) 2902 { 2903 u32 bd_num, desc_len, buf_len, buf_size, i; 2904 struct hclge_dev *hdev = ae_dev->priv; 2905 struct hclge_desc *desc; 2906 __le32 *desc_data; 2907 u32 *buf; 2908 int ret; 2909 2910 ret = hclge_query_all_err_bd_num(hdev, &bd_num); 2911 if (ret) 2912 goto out; 2913 2914 desc_len = bd_num * sizeof(struct hclge_desc); 2915 desc = kzalloc(desc_len, GFP_KERNEL); 2916 if (!desc) { 2917 ret = -ENOMEM; 2918 goto out; 2919 } 2920 2921 ret = hclge_query_all_err_info(hdev, desc, bd_num); 2922 if (ret) 2923 goto err_desc; 2924 2925 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN; 2926 buf_size = buf_len / sizeof(u32); 2927 2928 desc_data = kzalloc(buf_len, GFP_KERNEL); 2929 if (!desc_data) { 2930 ret = -ENOMEM; 2931 goto err_desc; 2932 } 2933 2934 buf = kzalloc(buf_len, GFP_KERNEL); 2935 if (!buf) { 2936 ret = -ENOMEM; 2937 goto err_buf_alloc; 2938 } 2939 2940 memcpy(desc_data, &desc[0].data[0], buf_len); 2941 for (i = 0; i < buf_size; i++) 2942 buf[i] = le32_to_cpu(desc_data[i]); 2943 2944 hclge_handle_error_module_log(ae_dev, buf, buf_size); 2945 kfree(buf); 2946 2947 err_buf_alloc: 2948 kfree(desc_data); 2949 err_desc: 2950 kfree(desc); 2951 out: 2952 return ret; 2953 } 2954 2955 static bool hclge_reset_vf_in_bitmap(struct hclge_dev *hdev, 2956 unsigned long *bitmap) 2957 { 2958 struct hclge_vport *vport; 2959 bool exist_set = false; 2960 int func_id; 2961 int ret; 2962 2963 func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM); 2964 if (func_id == PF_VPORT_ID) 2965 return false; 2966 2967 while (func_id != HCLGE_VPORT_NUM) { 2968 vport = hclge_get_vf_vport(hdev, 2969 func_id - HCLGE_VF_VPORT_START_NUM); 2970 if (!vport) { 2971 dev_err(&hdev->pdev->dev, "invalid func id(%d)\n", 2972 func_id); 2973 return false; 2974 } 2975 2976 dev_info(&hdev->pdev->dev, "do function %d recovery.", func_id); 2977 2978 ret = hclge_reset_tqp(&vport->nic); 2979 if (ret) { 2980 dev_err(&hdev->pdev->dev, 2981 "failed to reset tqp, ret = %d.", ret); 2982 return false; 2983 } 2984 2985 ret = hclge_inform_vf_reset(vport, HNAE3_VF_FUNC_RESET); 2986 if (ret) { 2987 dev_err(&hdev->pdev->dev, 2988 "failed to reset func %d, ret = %d.", 2989 func_id, ret); 2990 return false; 2991 } 2992 2993 exist_set = true; 2994 clear_bit(func_id, bitmap); 2995 func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM); 2996 } 2997 2998 return exist_set; 2999 } 3000 3001 static void hclge_get_vf_fault_bitmap(struct hclge_desc *desc, 3002 unsigned long *bitmap) 3003 { 3004 #define HCLGE_FIR_FAULT_BYTES 24 3005 #define HCLGE_SEC_FAULT_BYTES 8 3006 3007 u8 *buff; 3008 3009 BUILD_BUG_ON(HCLGE_FIR_FAULT_BYTES + HCLGE_SEC_FAULT_BYTES != 3010 BITS_TO_BYTES(HCLGE_VPORT_NUM)); 3011 3012 memcpy(bitmap, desc[0].data, HCLGE_FIR_FAULT_BYTES); 3013 buff = (u8 *)bitmap + HCLGE_FIR_FAULT_BYTES; 3014 memcpy(buff, desc[1].data, HCLGE_SEC_FAULT_BYTES); 3015 } 3016 3017 int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev) 3018 { 3019 unsigned long vf_fault_bitmap[BITS_TO_LONGS(HCLGE_VPORT_NUM)]; 3020 struct hclge_desc desc[2]; 3021 bool cause_by_vf = false; 3022 int ret; 3023 3024 if (!test_and_clear_bit(HNAE3_VF_EXP_RESET, 3025 &hdev->ae_dev->hw_err_reset_req) || 3026 !hnae3_ae_dev_vf_fault_supported(hdev->ae_dev)) 3027 return 0; 3028 3029 hclge_comm_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_GET_QUEUE_ERR_VF, 3030 true); 3031 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 3032 hclge_comm_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_GET_QUEUE_ERR_VF, 3033 true); 3034 3035 ret = hclge_comm_cmd_send(&hdev->hw.hw, desc, 2); 3036 if (ret) { 3037 dev_err(&hdev->pdev->dev, 3038 "failed to get vf bitmap, ret = %d.\n", ret); 3039 return ret; 3040 } 3041 hclge_get_vf_fault_bitmap(desc, vf_fault_bitmap); 3042 3043 cause_by_vf = hclge_reset_vf_in_bitmap(hdev, vf_fault_bitmap); 3044 if (cause_by_vf) 3045 hdev->ae_dev->hw_err_reset_req = 0; 3046 3047 return 0; 3048 } 3049