1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (c) 2016-2017 Hisilicon Limited. */ 3 4 #include <linux/sched/clock.h> 5 6 #include "hclge_err.h" 7 8 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { 9 { 10 .int_msk = BIT(1), 11 .msg = "imp_itcm0_ecc_mbit_err", 12 .reset_level = HNAE3_NONE_RESET 13 }, { 14 .int_msk = BIT(3), 15 .msg = "imp_itcm1_ecc_mbit_err", 16 .reset_level = HNAE3_NONE_RESET 17 }, { 18 .int_msk = BIT(5), 19 .msg = "imp_itcm2_ecc_mbit_err", 20 .reset_level = HNAE3_NONE_RESET 21 }, { 22 .int_msk = BIT(7), 23 .msg = "imp_itcm3_ecc_mbit_err", 24 .reset_level = HNAE3_NONE_RESET 25 }, { 26 .int_msk = BIT(9), 27 .msg = "imp_dtcm0_mem0_ecc_mbit_err", 28 .reset_level = HNAE3_NONE_RESET 29 }, { 30 .int_msk = BIT(11), 31 .msg = "imp_dtcm0_mem1_ecc_mbit_err", 32 .reset_level = HNAE3_NONE_RESET 33 }, { 34 .int_msk = BIT(13), 35 .msg = "imp_dtcm1_mem0_ecc_mbit_err", 36 .reset_level = HNAE3_NONE_RESET 37 }, { 38 .int_msk = BIT(15), 39 .msg = "imp_dtcm1_mem1_ecc_mbit_err", 40 .reset_level = HNAE3_NONE_RESET 41 }, { 42 .int_msk = BIT(17), 43 .msg = "imp_itcm4_ecc_mbit_err", 44 .reset_level = HNAE3_NONE_RESET 45 }, { 46 /* sentinel */ 47 } 48 }; 49 50 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { 51 { 52 .int_msk = BIT(1), 53 .msg = "cmdq_nic_rx_depth_ecc_mbit_err", 54 .reset_level = HNAE3_NONE_RESET 55 }, { 56 .int_msk = BIT(3), 57 .msg = "cmdq_nic_tx_depth_ecc_mbit_err", 58 .reset_level = HNAE3_NONE_RESET 59 }, { 60 .int_msk = BIT(5), 61 .msg = "cmdq_nic_rx_tail_ecc_mbit_err", 62 .reset_level = HNAE3_NONE_RESET 63 }, { 64 .int_msk = BIT(7), 65 .msg = "cmdq_nic_tx_tail_ecc_mbit_err", 66 .reset_level = HNAE3_NONE_RESET 67 }, { 68 .int_msk = BIT(9), 69 .msg = "cmdq_nic_rx_head_ecc_mbit_err", 70 .reset_level = HNAE3_NONE_RESET 71 }, { 72 .int_msk = BIT(11), 73 .msg = "cmdq_nic_tx_head_ecc_mbit_err", 74 .reset_level = HNAE3_NONE_RESET 75 }, { 76 .int_msk = BIT(13), 77 .msg = "cmdq_nic_rx_addr_ecc_mbit_err", 78 .reset_level = HNAE3_NONE_RESET 79 }, { 80 .int_msk = BIT(15), 81 .msg = "cmdq_nic_tx_addr_ecc_mbit_err", 82 .reset_level = HNAE3_NONE_RESET 83 }, { 84 .int_msk = BIT(17), 85 .msg = "cmdq_rocee_rx_depth_ecc_mbit_err", 86 .reset_level = HNAE3_NONE_RESET 87 }, { 88 .int_msk = BIT(19), 89 .msg = "cmdq_rocee_tx_depth_ecc_mbit_err", 90 .reset_level = HNAE3_NONE_RESET 91 }, { 92 .int_msk = BIT(21), 93 .msg = "cmdq_rocee_rx_tail_ecc_mbit_err", 94 .reset_level = HNAE3_NONE_RESET 95 }, { 96 .int_msk = BIT(23), 97 .msg = "cmdq_rocee_tx_tail_ecc_mbit_err", 98 .reset_level = HNAE3_NONE_RESET 99 }, { 100 .int_msk = BIT(25), 101 .msg = "cmdq_rocee_rx_head_ecc_mbit_err", 102 .reset_level = HNAE3_NONE_RESET 103 }, { 104 .int_msk = BIT(27), 105 .msg = "cmdq_rocee_tx_head_ecc_mbit_err", 106 .reset_level = HNAE3_NONE_RESET 107 }, { 108 .int_msk = BIT(29), 109 .msg = "cmdq_rocee_rx_addr_ecc_mbit_err", 110 .reset_level = HNAE3_NONE_RESET 111 }, { 112 .int_msk = BIT(31), 113 .msg = "cmdq_rocee_tx_addr_ecc_mbit_err", 114 .reset_level = HNAE3_NONE_RESET 115 }, { 116 /* sentinel */ 117 } 118 }; 119 120 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { 121 { 122 .int_msk = BIT(6), 123 .msg = "tqp_int_cfg_even_ecc_mbit_err", 124 .reset_level = HNAE3_NONE_RESET 125 }, { 126 .int_msk = BIT(7), 127 .msg = "tqp_int_cfg_odd_ecc_mbit_err", 128 .reset_level = HNAE3_NONE_RESET 129 }, { 130 .int_msk = BIT(8), 131 .msg = "tqp_int_ctrl_even_ecc_mbit_err", 132 .reset_level = HNAE3_NONE_RESET 133 }, { 134 .int_msk = BIT(9), 135 .msg = "tqp_int_ctrl_odd_ecc_mbit_err", 136 .reset_level = HNAE3_NONE_RESET 137 }, { 138 .int_msk = BIT(10), 139 .msg = "tx_que_scan_int_ecc_mbit_err", 140 .reset_level = HNAE3_NONE_RESET 141 }, { 142 .int_msk = BIT(11), 143 .msg = "rx_que_scan_int_ecc_mbit_err", 144 .reset_level = HNAE3_NONE_RESET 145 }, { 146 /* sentinel */ 147 } 148 }; 149 150 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { 151 { 152 .int_msk = BIT(1), 153 .msg = "msix_nic_ecc_mbit_err", 154 .reset_level = HNAE3_NONE_RESET 155 }, { 156 .int_msk = BIT(3), 157 .msg = "msix_rocee_ecc_mbit_err", 158 .reset_level = HNAE3_NONE_RESET 159 }, { 160 /* sentinel */ 161 } 162 }; 163 164 static const struct hclge_hw_error hclge_igu_int[] = { 165 { 166 .int_msk = BIT(0), 167 .msg = "igu_rx_buf0_ecc_mbit_err", 168 .reset_level = HNAE3_GLOBAL_RESET 169 }, { 170 .int_msk = BIT(2), 171 .msg = "igu_rx_buf1_ecc_mbit_err", 172 .reset_level = HNAE3_GLOBAL_RESET 173 }, { 174 /* sentinel */ 175 } 176 }; 177 178 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { 179 { 180 .int_msk = BIT(0), 181 .msg = "rx_buf_overflow", 182 .reset_level = HNAE3_GLOBAL_RESET 183 }, { 184 .int_msk = BIT(1), 185 .msg = "rx_stp_fifo_overflow", 186 .reset_level = HNAE3_GLOBAL_RESET 187 }, { 188 .int_msk = BIT(2), 189 .msg = "rx_stp_fifo_underflow", 190 .reset_level = HNAE3_GLOBAL_RESET 191 }, { 192 .int_msk = BIT(3), 193 .msg = "tx_buf_overflow", 194 .reset_level = HNAE3_GLOBAL_RESET 195 }, { 196 .int_msk = BIT(4), 197 .msg = "tx_buf_underrun", 198 .reset_level = HNAE3_GLOBAL_RESET 199 }, { 200 .int_msk = BIT(5), 201 .msg = "rx_stp_buf_overflow", 202 .reset_level = HNAE3_GLOBAL_RESET 203 }, { 204 /* sentinel */ 205 } 206 }; 207 208 static const struct hclge_hw_error hclge_ncsi_err_int[] = { 209 { 210 .int_msk = BIT(1), 211 .msg = "ncsi_tx_ecc_mbit_err", 212 .reset_level = HNAE3_NONE_RESET 213 }, { 214 /* sentinel */ 215 } 216 }; 217 218 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { 219 { 220 .int_msk = BIT(0), 221 .msg = "vf_vlan_ad_mem_ecc_mbit_err", 222 .reset_level = HNAE3_GLOBAL_RESET 223 }, { 224 .int_msk = BIT(1), 225 .msg = "umv_mcast_group_mem_ecc_mbit_err", 226 .reset_level = HNAE3_GLOBAL_RESET 227 }, { 228 .int_msk = BIT(2), 229 .msg = "umv_key_mem0_ecc_mbit_err", 230 .reset_level = HNAE3_GLOBAL_RESET 231 }, { 232 .int_msk = BIT(3), 233 .msg = "umv_key_mem1_ecc_mbit_err", 234 .reset_level = HNAE3_GLOBAL_RESET 235 }, { 236 .int_msk = BIT(4), 237 .msg = "umv_key_mem2_ecc_mbit_err", 238 .reset_level = HNAE3_GLOBAL_RESET 239 }, { 240 .int_msk = BIT(5), 241 .msg = "umv_key_mem3_ecc_mbit_err", 242 .reset_level = HNAE3_GLOBAL_RESET 243 }, { 244 .int_msk = BIT(6), 245 .msg = "umv_ad_mem_ecc_mbit_err", 246 .reset_level = HNAE3_GLOBAL_RESET 247 }, { 248 .int_msk = BIT(7), 249 .msg = "rss_tc_mode_mem_ecc_mbit_err", 250 .reset_level = HNAE3_GLOBAL_RESET 251 }, { 252 .int_msk = BIT(8), 253 .msg = "rss_idt_mem0_ecc_mbit_err", 254 .reset_level = HNAE3_GLOBAL_RESET 255 }, { 256 .int_msk = BIT(9), 257 .msg = "rss_idt_mem1_ecc_mbit_err", 258 .reset_level = HNAE3_GLOBAL_RESET 259 }, { 260 .int_msk = BIT(10), 261 .msg = "rss_idt_mem2_ecc_mbit_err", 262 .reset_level = HNAE3_GLOBAL_RESET 263 }, { 264 .int_msk = BIT(11), 265 .msg = "rss_idt_mem3_ecc_mbit_err", 266 .reset_level = HNAE3_GLOBAL_RESET 267 }, { 268 .int_msk = BIT(12), 269 .msg = "rss_idt_mem4_ecc_mbit_err", 270 .reset_level = HNAE3_GLOBAL_RESET 271 }, { 272 .int_msk = BIT(13), 273 .msg = "rss_idt_mem5_ecc_mbit_err", 274 .reset_level = HNAE3_GLOBAL_RESET 275 }, { 276 .int_msk = BIT(14), 277 .msg = "rss_idt_mem6_ecc_mbit_err", 278 .reset_level = HNAE3_GLOBAL_RESET 279 }, { 280 .int_msk = BIT(15), 281 .msg = "rss_idt_mem7_ecc_mbit_err", 282 .reset_level = HNAE3_GLOBAL_RESET 283 }, { 284 .int_msk = BIT(16), 285 .msg = "rss_idt_mem8_ecc_mbit_err", 286 .reset_level = HNAE3_GLOBAL_RESET 287 }, { 288 .int_msk = BIT(17), 289 .msg = "rss_idt_mem9_ecc_mbit_err", 290 .reset_level = HNAE3_GLOBAL_RESET 291 }, { 292 .int_msk = BIT(18), 293 .msg = "rss_idt_mem10_ecc_mbit_err", 294 .reset_level = HNAE3_GLOBAL_RESET 295 }, { 296 .int_msk = BIT(19), 297 .msg = "rss_idt_mem11_ecc_mbit_err", 298 .reset_level = HNAE3_GLOBAL_RESET 299 }, { 300 .int_msk = BIT(20), 301 .msg = "rss_idt_mem12_ecc_mbit_err", 302 .reset_level = HNAE3_GLOBAL_RESET 303 }, { 304 .int_msk = BIT(21), 305 .msg = "rss_idt_mem13_ecc_mbit_err", 306 .reset_level = HNAE3_GLOBAL_RESET 307 }, { 308 .int_msk = BIT(22), 309 .msg = "rss_idt_mem14_ecc_mbit_err", 310 .reset_level = HNAE3_GLOBAL_RESET 311 }, { 312 .int_msk = BIT(23), 313 .msg = "rss_idt_mem15_ecc_mbit_err", 314 .reset_level = HNAE3_GLOBAL_RESET 315 }, { 316 .int_msk = BIT(24), 317 .msg = "port_vlan_mem_ecc_mbit_err", 318 .reset_level = HNAE3_GLOBAL_RESET 319 }, { 320 .int_msk = BIT(25), 321 .msg = "mcast_linear_table_mem_ecc_mbit_err", 322 .reset_level = HNAE3_GLOBAL_RESET 323 }, { 324 .int_msk = BIT(26), 325 .msg = "mcast_result_mem_ecc_mbit_err", 326 .reset_level = HNAE3_GLOBAL_RESET 327 }, { 328 .int_msk = BIT(27), 329 .msg = "flow_director_ad_mem0_ecc_mbit_err", 330 .reset_level = HNAE3_GLOBAL_RESET 331 }, { 332 .int_msk = BIT(28), 333 .msg = "flow_director_ad_mem1_ecc_mbit_err", 334 .reset_level = HNAE3_GLOBAL_RESET 335 }, { 336 .int_msk = BIT(29), 337 .msg = "rx_vlan_tag_memory_ecc_mbit_err", 338 .reset_level = HNAE3_GLOBAL_RESET 339 }, { 340 .int_msk = BIT(30), 341 .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err", 342 .reset_level = HNAE3_GLOBAL_RESET 343 }, { 344 /* sentinel */ 345 } 346 }; 347 348 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { 349 { 350 .int_msk = BIT(0), 351 .msg = "tx_vlan_tag_err", 352 .reset_level = HNAE3_NONE_RESET 353 }, { 354 .int_msk = BIT(1), 355 .msg = "rss_list_tc_unassigned_queue_err", 356 .reset_level = HNAE3_NONE_RESET 357 }, { 358 /* sentinel */ 359 } 360 }; 361 362 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { 363 { 364 .int_msk = BIT(0), 365 .msg = "hfs_fifo_mem_ecc_mbit_err", 366 .reset_level = HNAE3_GLOBAL_RESET 367 }, { 368 .int_msk = BIT(1), 369 .msg = "rslt_descr_fifo_mem_ecc_mbit_err", 370 .reset_level = HNAE3_GLOBAL_RESET 371 }, { 372 .int_msk = BIT(2), 373 .msg = "tx_vlan_tag_mem_ecc_mbit_err", 374 .reset_level = HNAE3_GLOBAL_RESET 375 }, { 376 .int_msk = BIT(3), 377 .msg = "FD_CN0_memory_ecc_mbit_err", 378 .reset_level = HNAE3_GLOBAL_RESET 379 }, { 380 .int_msk = BIT(4), 381 .msg = "FD_CN1_memory_ecc_mbit_err", 382 .reset_level = HNAE3_GLOBAL_RESET 383 }, { 384 .int_msk = BIT(5), 385 .msg = "GRO_AD_memory_ecc_mbit_err", 386 .reset_level = HNAE3_GLOBAL_RESET 387 }, { 388 /* sentinel */ 389 } 390 }; 391 392 static const struct hclge_hw_error hclge_tm_sch_rint[] = { 393 { 394 .int_msk = BIT(1), 395 .msg = "tm_sch_ecc_mbit_err", 396 .reset_level = HNAE3_GLOBAL_RESET 397 }, { 398 .int_msk = BIT(2), 399 .msg = "tm_sch_port_shap_sub_fifo_wr_err", 400 .reset_level = HNAE3_GLOBAL_RESET 401 }, { 402 .int_msk = BIT(3), 403 .msg = "tm_sch_port_shap_sub_fifo_rd_err", 404 .reset_level = HNAE3_GLOBAL_RESET 405 }, { 406 .int_msk = BIT(4), 407 .msg = "tm_sch_pg_pshap_sub_fifo_wr_err", 408 .reset_level = HNAE3_GLOBAL_RESET 409 }, { 410 .int_msk = BIT(5), 411 .msg = "tm_sch_pg_pshap_sub_fifo_rd_err", 412 .reset_level = HNAE3_GLOBAL_RESET 413 }, { 414 .int_msk = BIT(6), 415 .msg = "tm_sch_pg_cshap_sub_fifo_wr_err", 416 .reset_level = HNAE3_GLOBAL_RESET 417 }, { 418 .int_msk = BIT(7), 419 .msg = "tm_sch_pg_cshap_sub_fifo_rd_err", 420 .reset_level = HNAE3_GLOBAL_RESET 421 }, { 422 .int_msk = BIT(8), 423 .msg = "tm_sch_pri_pshap_sub_fifo_wr_err", 424 .reset_level = HNAE3_GLOBAL_RESET 425 }, { 426 .int_msk = BIT(9), 427 .msg = "tm_sch_pri_pshap_sub_fifo_rd_err", 428 .reset_level = HNAE3_GLOBAL_RESET 429 }, { 430 .int_msk = BIT(10), 431 .msg = "tm_sch_pri_cshap_sub_fifo_wr_err", 432 .reset_level = HNAE3_GLOBAL_RESET 433 }, { 434 .int_msk = BIT(11), 435 .msg = "tm_sch_pri_cshap_sub_fifo_rd_err", 436 .reset_level = HNAE3_GLOBAL_RESET 437 }, { 438 .int_msk = BIT(12), 439 .msg = "tm_sch_port_shap_offset_fifo_wr_err", 440 .reset_level = HNAE3_GLOBAL_RESET 441 }, { 442 .int_msk = BIT(13), 443 .msg = "tm_sch_port_shap_offset_fifo_rd_err", 444 .reset_level = HNAE3_GLOBAL_RESET 445 }, { 446 .int_msk = BIT(14), 447 .msg = "tm_sch_pg_pshap_offset_fifo_wr_err", 448 .reset_level = HNAE3_GLOBAL_RESET 449 }, { 450 .int_msk = BIT(15), 451 .msg = "tm_sch_pg_pshap_offset_fifo_rd_err", 452 .reset_level = HNAE3_GLOBAL_RESET 453 }, { 454 .int_msk = BIT(16), 455 .msg = "tm_sch_pg_cshap_offset_fifo_wr_err", 456 .reset_level = HNAE3_GLOBAL_RESET 457 }, { 458 .int_msk = BIT(17), 459 .msg = "tm_sch_pg_cshap_offset_fifo_rd_err", 460 .reset_level = HNAE3_GLOBAL_RESET 461 }, { 462 .int_msk = BIT(18), 463 .msg = "tm_sch_pri_pshap_offset_fifo_wr_err", 464 .reset_level = HNAE3_GLOBAL_RESET 465 }, { 466 .int_msk = BIT(19), 467 .msg = "tm_sch_pri_pshap_offset_fifo_rd_err", 468 .reset_level = HNAE3_GLOBAL_RESET 469 }, { 470 .int_msk = BIT(20), 471 .msg = "tm_sch_pri_cshap_offset_fifo_wr_err", 472 .reset_level = HNAE3_GLOBAL_RESET 473 }, { 474 .int_msk = BIT(21), 475 .msg = "tm_sch_pri_cshap_offset_fifo_rd_err", 476 .reset_level = HNAE3_GLOBAL_RESET 477 }, { 478 .int_msk = BIT(22), 479 .msg = "tm_sch_rq_fifo_wr_err", 480 .reset_level = HNAE3_GLOBAL_RESET 481 }, { 482 .int_msk = BIT(23), 483 .msg = "tm_sch_rq_fifo_rd_err", 484 .reset_level = HNAE3_GLOBAL_RESET 485 }, { 486 .int_msk = BIT(24), 487 .msg = "tm_sch_nq_fifo_wr_err", 488 .reset_level = HNAE3_GLOBAL_RESET 489 }, { 490 .int_msk = BIT(25), 491 .msg = "tm_sch_nq_fifo_rd_err", 492 .reset_level = HNAE3_GLOBAL_RESET 493 }, { 494 .int_msk = BIT(26), 495 .msg = "tm_sch_roce_up_fifo_wr_err", 496 .reset_level = HNAE3_GLOBAL_RESET 497 }, { 498 .int_msk = BIT(27), 499 .msg = "tm_sch_roce_up_fifo_rd_err", 500 .reset_level = HNAE3_GLOBAL_RESET 501 }, { 502 .int_msk = BIT(28), 503 .msg = "tm_sch_rcb_byte_fifo_wr_err", 504 .reset_level = HNAE3_GLOBAL_RESET 505 }, { 506 .int_msk = BIT(29), 507 .msg = "tm_sch_rcb_byte_fifo_rd_err", 508 .reset_level = HNAE3_GLOBAL_RESET 509 }, { 510 .int_msk = BIT(30), 511 .msg = "tm_sch_ssu_byte_fifo_wr_err", 512 .reset_level = HNAE3_GLOBAL_RESET 513 }, { 514 .int_msk = BIT(31), 515 .msg = "tm_sch_ssu_byte_fifo_rd_err", 516 .reset_level = HNAE3_GLOBAL_RESET 517 }, { 518 /* sentinel */ 519 } 520 }; 521 522 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { 523 { 524 .int_msk = BIT(0), 525 .msg = "qcn_shap_gp0_sch_fifo_rd_err", 526 .reset_level = HNAE3_GLOBAL_RESET 527 }, { 528 .int_msk = BIT(1), 529 .msg = "qcn_shap_gp0_sch_fifo_wr_err", 530 .reset_level = HNAE3_GLOBAL_RESET 531 }, { 532 .int_msk = BIT(2), 533 .msg = "qcn_shap_gp1_sch_fifo_rd_err", 534 .reset_level = HNAE3_GLOBAL_RESET 535 }, { 536 .int_msk = BIT(3), 537 .msg = "qcn_shap_gp1_sch_fifo_wr_err", 538 .reset_level = HNAE3_GLOBAL_RESET 539 }, { 540 .int_msk = BIT(4), 541 .msg = "qcn_shap_gp2_sch_fifo_rd_err", 542 .reset_level = HNAE3_GLOBAL_RESET 543 }, { 544 .int_msk = BIT(5), 545 .msg = "qcn_shap_gp2_sch_fifo_wr_err", 546 .reset_level = HNAE3_GLOBAL_RESET 547 }, { 548 .int_msk = BIT(6), 549 .msg = "qcn_shap_gp3_sch_fifo_rd_err", 550 .reset_level = HNAE3_GLOBAL_RESET 551 }, { 552 .int_msk = BIT(7), 553 .msg = "qcn_shap_gp3_sch_fifo_wr_err", 554 .reset_level = HNAE3_GLOBAL_RESET 555 }, { 556 .int_msk = BIT(8), 557 .msg = "qcn_shap_gp0_offset_fifo_rd_err", 558 .reset_level = HNAE3_GLOBAL_RESET 559 }, { 560 .int_msk = BIT(9), 561 .msg = "qcn_shap_gp0_offset_fifo_wr_err", 562 .reset_level = HNAE3_GLOBAL_RESET 563 }, { 564 .int_msk = BIT(10), 565 .msg = "qcn_shap_gp1_offset_fifo_rd_err", 566 .reset_level = HNAE3_GLOBAL_RESET 567 }, { 568 .int_msk = BIT(11), 569 .msg = "qcn_shap_gp1_offset_fifo_wr_err", 570 .reset_level = HNAE3_GLOBAL_RESET 571 }, { 572 .int_msk = BIT(12), 573 .msg = "qcn_shap_gp2_offset_fifo_rd_err", 574 .reset_level = HNAE3_GLOBAL_RESET 575 }, { 576 .int_msk = BIT(13), 577 .msg = "qcn_shap_gp2_offset_fifo_wr_err", 578 .reset_level = HNAE3_GLOBAL_RESET 579 }, { 580 .int_msk = BIT(14), 581 .msg = "qcn_shap_gp3_offset_fifo_rd_err", 582 .reset_level = HNAE3_GLOBAL_RESET 583 }, { 584 .int_msk = BIT(15), 585 .msg = "qcn_shap_gp3_offset_fifo_wr_err", 586 .reset_level = HNAE3_GLOBAL_RESET 587 }, { 588 .int_msk = BIT(16), 589 .msg = "qcn_byte_info_fifo_rd_err", 590 .reset_level = HNAE3_GLOBAL_RESET 591 }, { 592 .int_msk = BIT(17), 593 .msg = "qcn_byte_info_fifo_wr_err", 594 .reset_level = HNAE3_GLOBAL_RESET 595 }, { 596 /* sentinel */ 597 } 598 }; 599 600 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { 601 { 602 .int_msk = BIT(1), 603 .msg = "qcn_byte_mem_ecc_mbit_err", 604 .reset_level = HNAE3_GLOBAL_RESET 605 }, { 606 .int_msk = BIT(3), 607 .msg = "qcn_time_mem_ecc_mbit_err", 608 .reset_level = HNAE3_GLOBAL_RESET 609 }, { 610 .int_msk = BIT(5), 611 .msg = "qcn_fb_mem_ecc_mbit_err", 612 .reset_level = HNAE3_GLOBAL_RESET 613 }, { 614 .int_msk = BIT(7), 615 .msg = "qcn_link_mem_ecc_mbit_err", 616 .reset_level = HNAE3_GLOBAL_RESET 617 }, { 618 .int_msk = BIT(9), 619 .msg = "qcn_rate_mem_ecc_mbit_err", 620 .reset_level = HNAE3_GLOBAL_RESET 621 }, { 622 .int_msk = BIT(11), 623 .msg = "qcn_tmplt_mem_ecc_mbit_err", 624 .reset_level = HNAE3_GLOBAL_RESET 625 }, { 626 .int_msk = BIT(13), 627 .msg = "qcn_shap_cfg_mem_ecc_mbit_err", 628 .reset_level = HNAE3_GLOBAL_RESET 629 }, { 630 .int_msk = BIT(15), 631 .msg = "qcn_gp0_barrel_mem_ecc_mbit_err", 632 .reset_level = HNAE3_GLOBAL_RESET 633 }, { 634 .int_msk = BIT(17), 635 .msg = "qcn_gp1_barrel_mem_ecc_mbit_err", 636 .reset_level = HNAE3_GLOBAL_RESET 637 }, { 638 .int_msk = BIT(19), 639 .msg = "qcn_gp2_barrel_mem_ecc_mbit_err", 640 .reset_level = HNAE3_GLOBAL_RESET 641 }, { 642 .int_msk = BIT(21), 643 .msg = "qcn_gp3_barral_mem_ecc_mbit_err", 644 .reset_level = HNAE3_GLOBAL_RESET 645 }, { 646 /* sentinel */ 647 } 648 }; 649 650 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { 651 { 652 .int_msk = BIT(0), 653 .msg = "egu_cge_afifo_ecc_1bit_err", 654 .reset_level = HNAE3_NONE_RESET 655 }, { 656 .int_msk = BIT(1), 657 .msg = "egu_cge_afifo_ecc_mbit_err", 658 .reset_level = HNAE3_GLOBAL_RESET 659 }, { 660 .int_msk = BIT(2), 661 .msg = "egu_lge_afifo_ecc_1bit_err", 662 .reset_level = HNAE3_NONE_RESET 663 }, { 664 .int_msk = BIT(3), 665 .msg = "egu_lge_afifo_ecc_mbit_err", 666 .reset_level = HNAE3_GLOBAL_RESET 667 }, { 668 .int_msk = BIT(4), 669 .msg = "cge_igu_afifo_ecc_1bit_err", 670 .reset_level = HNAE3_NONE_RESET 671 }, { 672 .int_msk = BIT(5), 673 .msg = "cge_igu_afifo_ecc_mbit_err", 674 .reset_level = HNAE3_GLOBAL_RESET 675 }, { 676 .int_msk = BIT(6), 677 .msg = "lge_igu_afifo_ecc_1bit_err", 678 .reset_level = HNAE3_NONE_RESET 679 }, { 680 .int_msk = BIT(7), 681 .msg = "lge_igu_afifo_ecc_mbit_err", 682 .reset_level = HNAE3_GLOBAL_RESET 683 }, { 684 .int_msk = BIT(8), 685 .msg = "cge_igu_afifo_overflow_err", 686 .reset_level = HNAE3_GLOBAL_RESET 687 }, { 688 .int_msk = BIT(9), 689 .msg = "lge_igu_afifo_overflow_err", 690 .reset_level = HNAE3_GLOBAL_RESET 691 }, { 692 .int_msk = BIT(10), 693 .msg = "egu_cge_afifo_underrun_err", 694 .reset_level = HNAE3_GLOBAL_RESET 695 }, { 696 .int_msk = BIT(11), 697 .msg = "egu_lge_afifo_underrun_err", 698 .reset_level = HNAE3_GLOBAL_RESET 699 }, { 700 .int_msk = BIT(12), 701 .msg = "egu_ge_afifo_underrun_err", 702 .reset_level = HNAE3_GLOBAL_RESET 703 }, { 704 .int_msk = BIT(13), 705 .msg = "ge_igu_afifo_overflow_err", 706 .reset_level = HNAE3_GLOBAL_RESET 707 }, { 708 /* sentinel */ 709 } 710 }; 711 712 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { 713 { 714 .int_msk = BIT(13), 715 .msg = "rpu_rx_pkt_bit32_ecc_mbit_err", 716 .reset_level = HNAE3_GLOBAL_RESET 717 }, { 718 .int_msk = BIT(14), 719 .msg = "rpu_rx_pkt_bit33_ecc_mbit_err", 720 .reset_level = HNAE3_GLOBAL_RESET 721 }, { 722 .int_msk = BIT(15), 723 .msg = "rpu_rx_pkt_bit34_ecc_mbit_err", 724 .reset_level = HNAE3_GLOBAL_RESET 725 }, { 726 .int_msk = BIT(16), 727 .msg = "rpu_rx_pkt_bit35_ecc_mbit_err", 728 .reset_level = HNAE3_GLOBAL_RESET 729 }, { 730 .int_msk = BIT(17), 731 .msg = "rcb_tx_ring_ecc_mbit_err", 732 .reset_level = HNAE3_GLOBAL_RESET 733 }, { 734 .int_msk = BIT(18), 735 .msg = "rcb_rx_ring_ecc_mbit_err", 736 .reset_level = HNAE3_GLOBAL_RESET 737 }, { 738 .int_msk = BIT(19), 739 .msg = "rcb_tx_fbd_ecc_mbit_err", 740 .reset_level = HNAE3_GLOBAL_RESET 741 }, { 742 .int_msk = BIT(20), 743 .msg = "rcb_rx_ebd_ecc_mbit_err", 744 .reset_level = HNAE3_GLOBAL_RESET 745 }, { 746 .int_msk = BIT(21), 747 .msg = "rcb_tso_info_ecc_mbit_err", 748 .reset_level = HNAE3_GLOBAL_RESET 749 }, { 750 .int_msk = BIT(22), 751 .msg = "rcb_tx_int_info_ecc_mbit_err", 752 .reset_level = HNAE3_GLOBAL_RESET 753 }, { 754 .int_msk = BIT(23), 755 .msg = "rcb_rx_int_info_ecc_mbit_err", 756 .reset_level = HNAE3_GLOBAL_RESET 757 }, { 758 .int_msk = BIT(24), 759 .msg = "tpu_tx_pkt_0_ecc_mbit_err", 760 .reset_level = HNAE3_GLOBAL_RESET 761 }, { 762 .int_msk = BIT(25), 763 .msg = "tpu_tx_pkt_1_ecc_mbit_err", 764 .reset_level = HNAE3_GLOBAL_RESET 765 }, { 766 .int_msk = BIT(26), 767 .msg = "rd_bus_err", 768 .reset_level = HNAE3_GLOBAL_RESET 769 }, { 770 .int_msk = BIT(27), 771 .msg = "wr_bus_err", 772 .reset_level = HNAE3_GLOBAL_RESET 773 }, { 774 .int_msk = BIT(28), 775 .msg = "reg_search_miss", 776 .reset_level = HNAE3_GLOBAL_RESET 777 }, { 778 .int_msk = BIT(29), 779 .msg = "rx_q_search_miss", 780 .reset_level = HNAE3_NONE_RESET 781 }, { 782 .int_msk = BIT(30), 783 .msg = "ooo_ecc_err_detect", 784 .reset_level = HNAE3_NONE_RESET 785 }, { 786 .int_msk = BIT(31), 787 .msg = "ooo_ecc_err_multpl", 788 .reset_level = HNAE3_GLOBAL_RESET 789 }, { 790 /* sentinel */ 791 } 792 }; 793 794 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { 795 { 796 .int_msk = BIT(4), 797 .msg = "gro_bd_ecc_mbit_err", 798 .reset_level = HNAE3_GLOBAL_RESET 799 }, { 800 .int_msk = BIT(5), 801 .msg = "gro_context_ecc_mbit_err", 802 .reset_level = HNAE3_GLOBAL_RESET 803 }, { 804 .int_msk = BIT(6), 805 .msg = "rx_stash_cfg_ecc_mbit_err", 806 .reset_level = HNAE3_GLOBAL_RESET 807 }, { 808 .int_msk = BIT(7), 809 .msg = "axi_rd_fbd_ecc_mbit_err", 810 .reset_level = HNAE3_GLOBAL_RESET 811 }, { 812 /* sentinel */ 813 } 814 }; 815 816 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { 817 { 818 .int_msk = BIT(0), 819 .msg = "over_8bd_no_fe", 820 .reset_level = HNAE3_FUNC_RESET 821 }, { 822 .int_msk = BIT(1), 823 .msg = "tso_mss_cmp_min_err", 824 .reset_level = HNAE3_NONE_RESET 825 }, { 826 .int_msk = BIT(2), 827 .msg = "tso_mss_cmp_max_err", 828 .reset_level = HNAE3_NONE_RESET 829 }, { 830 .int_msk = BIT(3), 831 .msg = "tx_rd_fbd_poison", 832 .reset_level = HNAE3_FUNC_RESET 833 }, { 834 .int_msk = BIT(4), 835 .msg = "rx_rd_ebd_poison", 836 .reset_level = HNAE3_FUNC_RESET 837 }, { 838 .int_msk = BIT(5), 839 .msg = "buf_wait_timeout", 840 .reset_level = HNAE3_NONE_RESET 841 }, { 842 /* sentinel */ 843 } 844 }; 845 846 static const struct hclge_hw_error hclge_ssu_com_err_int[] = { 847 { 848 .int_msk = BIT(0), 849 .msg = "buf_sum_err", 850 .reset_level = HNAE3_NONE_RESET 851 }, { 852 .int_msk = BIT(1), 853 .msg = "ppp_mb_num_err", 854 .reset_level = HNAE3_NONE_RESET 855 }, { 856 .int_msk = BIT(2), 857 .msg = "ppp_mbid_err", 858 .reset_level = HNAE3_GLOBAL_RESET 859 }, { 860 .int_msk = BIT(3), 861 .msg = "ppp_rlt_mac_err", 862 .reset_level = HNAE3_GLOBAL_RESET 863 }, { 864 .int_msk = BIT(4), 865 .msg = "ppp_rlt_host_err", 866 .reset_level = HNAE3_GLOBAL_RESET 867 }, { 868 .int_msk = BIT(5), 869 .msg = "cks_edit_position_err", 870 .reset_level = HNAE3_GLOBAL_RESET 871 }, { 872 .int_msk = BIT(6), 873 .msg = "cks_edit_condition_err", 874 .reset_level = HNAE3_GLOBAL_RESET 875 }, { 876 .int_msk = BIT(7), 877 .msg = "vlan_edit_condition_err", 878 .reset_level = HNAE3_GLOBAL_RESET 879 }, { 880 .int_msk = BIT(8), 881 .msg = "vlan_num_ot_err", 882 .reset_level = HNAE3_GLOBAL_RESET 883 }, { 884 .int_msk = BIT(9), 885 .msg = "vlan_num_in_err", 886 .reset_level = HNAE3_GLOBAL_RESET 887 }, { 888 /* sentinel */ 889 } 890 }; 891 892 #define HCLGE_SSU_MEM_ECC_ERR(x) \ 893 { \ 894 .int_msk = BIT(x), \ 895 .msg = "ssu_mem" #x "_ecc_mbit_err", \ 896 .reset_level = HNAE3_GLOBAL_RESET \ 897 } 898 899 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { 900 HCLGE_SSU_MEM_ECC_ERR(0), 901 HCLGE_SSU_MEM_ECC_ERR(1), 902 HCLGE_SSU_MEM_ECC_ERR(2), 903 HCLGE_SSU_MEM_ECC_ERR(3), 904 HCLGE_SSU_MEM_ECC_ERR(4), 905 HCLGE_SSU_MEM_ECC_ERR(5), 906 HCLGE_SSU_MEM_ECC_ERR(6), 907 HCLGE_SSU_MEM_ECC_ERR(7), 908 HCLGE_SSU_MEM_ECC_ERR(8), 909 HCLGE_SSU_MEM_ECC_ERR(9), 910 HCLGE_SSU_MEM_ECC_ERR(10), 911 HCLGE_SSU_MEM_ECC_ERR(11), 912 HCLGE_SSU_MEM_ECC_ERR(12), 913 HCLGE_SSU_MEM_ECC_ERR(13), 914 HCLGE_SSU_MEM_ECC_ERR(14), 915 HCLGE_SSU_MEM_ECC_ERR(15), 916 HCLGE_SSU_MEM_ECC_ERR(16), 917 HCLGE_SSU_MEM_ECC_ERR(17), 918 HCLGE_SSU_MEM_ECC_ERR(18), 919 HCLGE_SSU_MEM_ECC_ERR(19), 920 HCLGE_SSU_MEM_ECC_ERR(20), 921 HCLGE_SSU_MEM_ECC_ERR(21), 922 HCLGE_SSU_MEM_ECC_ERR(22), 923 HCLGE_SSU_MEM_ECC_ERR(23), 924 HCLGE_SSU_MEM_ECC_ERR(24), 925 HCLGE_SSU_MEM_ECC_ERR(25), 926 HCLGE_SSU_MEM_ECC_ERR(26), 927 HCLGE_SSU_MEM_ECC_ERR(27), 928 HCLGE_SSU_MEM_ECC_ERR(28), 929 HCLGE_SSU_MEM_ECC_ERR(29), 930 HCLGE_SSU_MEM_ECC_ERR(30), 931 HCLGE_SSU_MEM_ECC_ERR(31), 932 { /* sentinel */ } 933 }; 934 935 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { 936 { 937 .int_msk = BIT(0), 938 .msg = "roc_pkt_without_key_port", 939 .reset_level = HNAE3_FUNC_RESET 940 }, { 941 .int_msk = BIT(1), 942 .msg = "tpu_pkt_without_key_port", 943 .reset_level = HNAE3_GLOBAL_RESET 944 }, { 945 .int_msk = BIT(2), 946 .msg = "igu_pkt_without_key_port", 947 .reset_level = HNAE3_GLOBAL_RESET 948 }, { 949 .int_msk = BIT(3), 950 .msg = "roc_eof_mis_match_port", 951 .reset_level = HNAE3_GLOBAL_RESET 952 }, { 953 .int_msk = BIT(4), 954 .msg = "tpu_eof_mis_match_port", 955 .reset_level = HNAE3_GLOBAL_RESET 956 }, { 957 .int_msk = BIT(5), 958 .msg = "igu_eof_mis_match_port", 959 .reset_level = HNAE3_GLOBAL_RESET 960 }, { 961 .int_msk = BIT(6), 962 .msg = "roc_sof_mis_match_port", 963 .reset_level = HNAE3_GLOBAL_RESET 964 }, { 965 .int_msk = BIT(7), 966 .msg = "tpu_sof_mis_match_port", 967 .reset_level = HNAE3_GLOBAL_RESET 968 }, { 969 .int_msk = BIT(8), 970 .msg = "igu_sof_mis_match_port", 971 .reset_level = HNAE3_GLOBAL_RESET 972 }, { 973 .int_msk = BIT(11), 974 .msg = "ets_rd_int_rx_port", 975 .reset_level = HNAE3_GLOBAL_RESET 976 }, { 977 .int_msk = BIT(12), 978 .msg = "ets_wr_int_rx_port", 979 .reset_level = HNAE3_GLOBAL_RESET 980 }, { 981 .int_msk = BIT(13), 982 .msg = "ets_rd_int_tx_port", 983 .reset_level = HNAE3_GLOBAL_RESET 984 }, { 985 .int_msk = BIT(14), 986 .msg = "ets_wr_int_tx_port", 987 .reset_level = HNAE3_GLOBAL_RESET 988 }, { 989 /* sentinel */ 990 } 991 }; 992 993 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { 994 { 995 .int_msk = BIT(0), 996 .msg = "ig_mac_inf_int", 997 .reset_level = HNAE3_GLOBAL_RESET 998 }, { 999 .int_msk = BIT(1), 1000 .msg = "ig_host_inf_int", 1001 .reset_level = HNAE3_GLOBAL_RESET 1002 }, { 1003 .int_msk = BIT(2), 1004 .msg = "ig_roc_buf_int", 1005 .reset_level = HNAE3_GLOBAL_RESET 1006 }, { 1007 .int_msk = BIT(3), 1008 .msg = "ig_host_data_fifo_int", 1009 .reset_level = HNAE3_GLOBAL_RESET 1010 }, { 1011 .int_msk = BIT(4), 1012 .msg = "ig_host_key_fifo_int", 1013 .reset_level = HNAE3_GLOBAL_RESET 1014 }, { 1015 .int_msk = BIT(5), 1016 .msg = "tx_qcn_fifo_int", 1017 .reset_level = HNAE3_GLOBAL_RESET 1018 }, { 1019 .int_msk = BIT(6), 1020 .msg = "rx_qcn_fifo_int", 1021 .reset_level = HNAE3_GLOBAL_RESET 1022 }, { 1023 .int_msk = BIT(7), 1024 .msg = "tx_pf_rd_fifo_int", 1025 .reset_level = HNAE3_GLOBAL_RESET 1026 }, { 1027 .int_msk = BIT(8), 1028 .msg = "rx_pf_rd_fifo_int", 1029 .reset_level = HNAE3_GLOBAL_RESET 1030 }, { 1031 .int_msk = BIT(9), 1032 .msg = "qm_eof_fifo_int", 1033 .reset_level = HNAE3_GLOBAL_RESET 1034 }, { 1035 .int_msk = BIT(10), 1036 .msg = "mb_rlt_fifo_int", 1037 .reset_level = HNAE3_GLOBAL_RESET 1038 }, { 1039 .int_msk = BIT(11), 1040 .msg = "dup_uncopy_fifo_int", 1041 .reset_level = HNAE3_GLOBAL_RESET 1042 }, { 1043 .int_msk = BIT(12), 1044 .msg = "dup_cnt_rd_fifo_int", 1045 .reset_level = HNAE3_GLOBAL_RESET 1046 }, { 1047 .int_msk = BIT(13), 1048 .msg = "dup_cnt_drop_fifo_int", 1049 .reset_level = HNAE3_GLOBAL_RESET 1050 }, { 1051 .int_msk = BIT(14), 1052 .msg = "dup_cnt_wrb_fifo_int", 1053 .reset_level = HNAE3_GLOBAL_RESET 1054 }, { 1055 .int_msk = BIT(15), 1056 .msg = "host_cmd_fifo_int", 1057 .reset_level = HNAE3_GLOBAL_RESET 1058 }, { 1059 .int_msk = BIT(16), 1060 .msg = "mac_cmd_fifo_int", 1061 .reset_level = HNAE3_GLOBAL_RESET 1062 }, { 1063 .int_msk = BIT(17), 1064 .msg = "host_cmd_bitmap_empty_int", 1065 .reset_level = HNAE3_GLOBAL_RESET 1066 }, { 1067 .int_msk = BIT(18), 1068 .msg = "mac_cmd_bitmap_empty_int", 1069 .reset_level = HNAE3_GLOBAL_RESET 1070 }, { 1071 .int_msk = BIT(19), 1072 .msg = "dup_bitmap_empty_int", 1073 .reset_level = HNAE3_GLOBAL_RESET 1074 }, { 1075 .int_msk = BIT(20), 1076 .msg = "out_queue_bitmap_empty_int", 1077 .reset_level = HNAE3_GLOBAL_RESET 1078 }, { 1079 .int_msk = BIT(21), 1080 .msg = "bank2_bitmap_empty_int", 1081 .reset_level = HNAE3_GLOBAL_RESET 1082 }, { 1083 .int_msk = BIT(22), 1084 .msg = "bank1_bitmap_empty_int", 1085 .reset_level = HNAE3_GLOBAL_RESET 1086 }, { 1087 .int_msk = BIT(23), 1088 .msg = "bank0_bitmap_empty_int", 1089 .reset_level = HNAE3_GLOBAL_RESET 1090 }, { 1091 /* sentinel */ 1092 } 1093 }; 1094 1095 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { 1096 { 1097 .int_msk = BIT(0), 1098 .msg = "ets_rd_int_rx_tcg", 1099 .reset_level = HNAE3_GLOBAL_RESET 1100 }, { 1101 .int_msk = BIT(1), 1102 .msg = "ets_wr_int_rx_tcg", 1103 .reset_level = HNAE3_GLOBAL_RESET 1104 }, { 1105 .int_msk = BIT(2), 1106 .msg = "ets_rd_int_tx_tcg", 1107 .reset_level = HNAE3_GLOBAL_RESET 1108 }, { 1109 .int_msk = BIT(3), 1110 .msg = "ets_wr_int_tx_tcg", 1111 .reset_level = HNAE3_GLOBAL_RESET 1112 }, { 1113 /* sentinel */ 1114 } 1115 }; 1116 1117 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { 1118 { 1119 .int_msk = BIT(0), 1120 .msg = "roc_pkt_without_key_port", 1121 .reset_level = HNAE3_FUNC_RESET 1122 }, { 1123 .int_msk = BIT(9), 1124 .msg = "low_water_line_err_port", 1125 .reset_level = HNAE3_NONE_RESET 1126 }, { 1127 .int_msk = BIT(10), 1128 .msg = "hi_water_line_err_port", 1129 .reset_level = HNAE3_GLOBAL_RESET 1130 }, { 1131 /* sentinel */ 1132 } 1133 }; 1134 1135 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { 1136 { 1137 .int_msk = 0, 1138 .msg = "rocee qmm ovf: sgid invalid err" 1139 }, { 1140 .int_msk = 0x4, 1141 .msg = "rocee qmm ovf: sgid ovf err" 1142 }, { 1143 .int_msk = 0x8, 1144 .msg = "rocee qmm ovf: smac invalid err" 1145 }, { 1146 .int_msk = 0xC, 1147 .msg = "rocee qmm ovf: smac ovf err" 1148 }, { 1149 .int_msk = 0x10, 1150 .msg = "rocee qmm ovf: cqc invalid err" 1151 }, { 1152 .int_msk = 0x11, 1153 .msg = "rocee qmm ovf: cqc ovf err" 1154 }, { 1155 .int_msk = 0x12, 1156 .msg = "rocee qmm ovf: cqc hopnum err" 1157 }, { 1158 .int_msk = 0x13, 1159 .msg = "rocee qmm ovf: cqc ba0 err" 1160 }, { 1161 .int_msk = 0x14, 1162 .msg = "rocee qmm ovf: srqc invalid err" 1163 }, { 1164 .int_msk = 0x15, 1165 .msg = "rocee qmm ovf: srqc ovf err" 1166 }, { 1167 .int_msk = 0x16, 1168 .msg = "rocee qmm ovf: srqc hopnum err" 1169 }, { 1170 .int_msk = 0x17, 1171 .msg = "rocee qmm ovf: srqc ba0 err" 1172 }, { 1173 .int_msk = 0x18, 1174 .msg = "rocee qmm ovf: mpt invalid err" 1175 }, { 1176 .int_msk = 0x19, 1177 .msg = "rocee qmm ovf: mpt ovf err" 1178 }, { 1179 .int_msk = 0x1A, 1180 .msg = "rocee qmm ovf: mpt hopnum err" 1181 }, { 1182 .int_msk = 0x1B, 1183 .msg = "rocee qmm ovf: mpt ba0 err" 1184 }, { 1185 .int_msk = 0x1C, 1186 .msg = "rocee qmm ovf: qpc invalid err" 1187 }, { 1188 .int_msk = 0x1D, 1189 .msg = "rocee qmm ovf: qpc ovf err" 1190 }, { 1191 .int_msk = 0x1E, 1192 .msg = "rocee qmm ovf: qpc hopnum err" 1193 }, { 1194 .int_msk = 0x1F, 1195 .msg = "rocee qmm ovf: qpc ba0 err" 1196 }, { 1197 /* sentinel */ 1198 } 1199 }; 1200 1201 static const struct hclge_mod_reg_info hclge_ssu_reg_0_info[] = { 1202 { 1203 .reg_name = "SSU_BP_STATUS_0~5", 1204 .reg_offset_group = { 5, 6, 7, 8, 9, 10}, 1205 .group_size = 6 1206 }, { 1207 .reg_name = "LO_PRI_UNICAST_CUR_CNT", 1208 .reg_offset_group = {54}, 1209 .group_size = 1 1210 }, { 1211 .reg_name = "HI/LO_PRI_MULTICAST_CUR_CNT", 1212 .reg_offset_group = {55, 56}, 1213 .group_size = 2 1214 }, { 1215 .reg_name = "SSU_MB_RD_RLT_DROP_CNT", 1216 .reg_offset_group = {29}, 1217 .group_size = 1 1218 }, { 1219 .reg_name = "SSU_PPP_MAC_KEY_NUM", 1220 .reg_offset_group = {31, 30}, 1221 .group_size = 2 1222 }, { 1223 .reg_name = "SSU_PPP_HOST_KEY_NUM", 1224 .reg_offset_group = {33, 32}, 1225 .group_size = 2 1226 }, { 1227 .reg_name = "PPP_SSU_MAC/HOST_RLT_NUM", 1228 .reg_offset_group = {35, 34, 37, 36}, 1229 .group_size = 4 1230 }, { 1231 .reg_name = "FULL/PART_DROP_NUM", 1232 .reg_offset_group = {18, 19}, 1233 .group_size = 2 1234 }, { 1235 .reg_name = "PPP_KEY/RLT_DROP_NUM", 1236 .reg_offset_group = {20, 21}, 1237 .group_size = 2 1238 }, { 1239 .reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT", 1240 .reg_offset_group = {48, 49}, 1241 .group_size = 2 1242 }, { 1243 .reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT_RX", 1244 .reg_offset_group = {50, 51}, 1245 .group_size = 2 1246 }, 1247 }; 1248 1249 static const struct hclge_mod_reg_info hclge_ssu_reg_1_info[] = { 1250 { 1251 .reg_name = "RX_PACKET_IN/OUT_CNT", 1252 .reg_offset_group = {13, 12, 15, 14}, 1253 .group_size = 4 1254 }, { 1255 .reg_name = "TX_PACKET_IN/OUT_CNT", 1256 .reg_offset_group = {17, 16, 19, 18}, 1257 .group_size = 4 1258 }, { 1259 .reg_name = "RX_PACKET_TC0_IN/OUT_CNT", 1260 .reg_offset_group = {25, 24, 41, 40}, 1261 .group_size = 4 1262 }, { 1263 .reg_name = "RX_PACKET_TC1_IN/OUT_CNT", 1264 .reg_offset_group = {27, 26, 43, 42}, 1265 .group_size = 4 1266 }, { 1267 .reg_name = "RX_PACKET_TC2_IN/OUT_CNT", 1268 .reg_offset_group = {29, 28, 45, 44}, 1269 .group_size = 4 1270 }, { 1271 .reg_name = "RX_PACKET_TC3_IN/OUT_CNT", 1272 .reg_offset_group = {31, 30, 47, 46}, 1273 .group_size = 4 1274 }, { 1275 .reg_name = "RX_PACKET_TC4_IN/OUT_CNT", 1276 .reg_offset_group = {33, 32, 49, 48}, 1277 .group_size = 4 1278 }, { 1279 .reg_name = "RX_PACKET_TC5_IN/OUT_CNT", 1280 .reg_offset_group = {35, 34, 51, 50}, 1281 .group_size = 4 1282 }, { 1283 .reg_name = "RX_PACKET_TC6_IN/OUT_CNT", 1284 .reg_offset_group = {37, 36, 53, 52}, 1285 .group_size = 4 1286 }, { 1287 .reg_name = "RX_PACKET_TC7_IN/OUT_CNT", 1288 .reg_offset_group = {39, 38, 55, 54}, 1289 .group_size = 4 1290 }, { 1291 .reg_name = "TX_PACKET_TC0_IN/OUT_CNT", 1292 .reg_offset_group = {57, 56, 73, 72}, 1293 .group_size = 4 1294 }, { 1295 .reg_name = "TX_PACKET_TC1_IN/OUT_CNT", 1296 .reg_offset_group = {59, 58, 75, 74}, 1297 .group_size = 4 1298 }, { 1299 .reg_name = "TX_PACKET_TC2_IN/OUT_CNT", 1300 .reg_offset_group = {61, 60, 77, 76}, 1301 .group_size = 4 1302 }, { 1303 .reg_name = "TX_PACKET_TC3_IN/OUT_CNT", 1304 .reg_offset_group = {63, 62, 79, 78}, 1305 .group_size = 4 1306 }, { 1307 .reg_name = "TX_PACKET_TC4_IN/OUT_CNT", 1308 .reg_offset_group = {65, 64, 81, 80}, 1309 .group_size = 4 1310 }, { 1311 .reg_name = "TX_PACKET_TC5_IN/OUT_CNT", 1312 .reg_offset_group = {67, 66, 83, 82}, 1313 .group_size = 4 1314 }, { 1315 .reg_name = "TX_PACKET_TC6_IN/OUT_CNT", 1316 .reg_offset_group = {69, 68, 85, 84}, 1317 .group_size = 4 1318 }, { 1319 .reg_name = "TX_PACKET_TC7_IN/OUT_CNT", 1320 .reg_offset_group = {71, 70, 87, 86}, 1321 .group_size = 4 1322 }, { 1323 .reg_name = "PACKET_TC0~3_CURR_BUFFER_CNT", 1324 .reg_offset_group = {1, 2, 3, 4}, 1325 .group_size = 4 1326 }, { 1327 .reg_name = "PACKET_TC4~7_CURR_BUFFER_CNT", 1328 .reg_offset_group = {5, 6, 7, 8}, 1329 .group_size = 4 1330 }, { 1331 .reg_name = "ROC_RX_PACKET_IN_CNT", 1332 .reg_offset_group = {21, 20}, 1333 .group_size = 2 1334 }, { 1335 .reg_name = "ROC_TX_PACKET_OUT_CNT", 1336 .reg_offset_group = {23, 22}, 1337 .group_size = 2 1338 } 1339 }; 1340 1341 static const struct hclge_mod_reg_info hclge_rpu_reg_0_info[] = { 1342 { 1343 .reg_name = "RPU_FSM_DFX_ST0/ST1_TNL", 1344 .has_suffix = true, 1345 .reg_offset_group = {1, 2}, 1346 .group_size = 2 1347 }, { 1348 .reg_name = "RPU_RX_PKT_DROP_CNT_TNL", 1349 .has_suffix = true, 1350 .reg_offset_group = {3}, 1351 .group_size = 1 1352 } 1353 }; 1354 1355 static const struct hclge_mod_reg_info hclge_rpu_reg_1_info[] = { 1356 { 1357 .reg_name = "FIFO_DFX_ST0_1_2_4", 1358 .reg_offset_group = {1, 2, 3, 5}, 1359 .group_size = 4 1360 } 1361 }; 1362 1363 static const struct hclge_mod_reg_info hclge_igu_egu_reg_info[] = { 1364 { 1365 .reg_name = "IGU_RX_ERR_PKT", 1366 .reg_offset_group = {1}, 1367 .group_size = 1 1368 }, { 1369 .reg_name = "IGU_RX_OUT_ALL_PKT", 1370 .reg_offset_group = {29, 28}, 1371 .group_size = 2 1372 }, { 1373 .reg_name = "EGU_TX_OUT_ALL_PKT", 1374 .reg_offset_group = {39, 38}, 1375 .group_size = 2 1376 }, { 1377 .reg_name = "EGU_TX_ERR_PKT", 1378 .reg_offset_group = {5}, 1379 .group_size = 1 1380 } 1381 }; 1382 1383 static const struct hclge_mod_reg_info hclge_gen_reg_info_tnl[] = { 1384 { 1385 .reg_name = "SSU2RPU_TNL_WR_PKT_CNT_TNL", 1386 .has_suffix = true, 1387 .reg_offset_group = {1}, 1388 .group_size = 1 1389 }, { 1390 .reg_name = "RPU2HST_TNL_WR_PKT_CNT_TNL", 1391 .has_suffix = true, 1392 .reg_offset_group = {12}, 1393 .group_size = 1 1394 } 1395 }; 1396 1397 static const struct hclge_mod_reg_info hclge_gen_reg_info[] = { 1398 { 1399 .reg_name = "SSU_OVERSIZE_DROP_CNT", 1400 .reg_offset_group = {12}, 1401 .group_size = 1 1402 }, { 1403 .reg_name = "ROCE_RX_BYPASS_5NS_DROP_NUM", 1404 .reg_offset_group = {13}, 1405 .group_size = 1 1406 }, { 1407 .reg_name = "RX_PKT_IN/OUT_ERR_CNT", 1408 .reg_offset_group = {15, 14, 19, 18}, 1409 .group_size = 4 1410 }, { 1411 .reg_name = "TX_PKT_IN/OUT_ERR_CNT", 1412 .reg_offset_group = {17, 16, 21, 20}, 1413 .group_size = 4 1414 }, { 1415 .reg_name = "ETS_TC_READY", 1416 .reg_offset_group = {22}, 1417 .group_size = 1 1418 }, { 1419 .reg_name = "MIB_TX/RX_BAD_PKTS", 1420 .reg_offset_group = {19, 18, 29, 28}, 1421 .group_size = 4 1422 }, { 1423 .reg_name = "MIB_TX/RX_GOOD_PKTS", 1424 .reg_offset_group = {21, 20, 31, 30}, 1425 .group_size = 4 1426 }, { 1427 .reg_name = "MIB_TX/RX_TOTAL_PKTS", 1428 .reg_offset_group = {23, 22, 33, 32}, 1429 .group_size = 4 1430 }, { 1431 .reg_name = "MIB_TX/RX_PAUSE_PKTS", 1432 .reg_offset_group = {25, 24, 35, 34}, 1433 .group_size = 4 1434 }, { 1435 .reg_name = "MIB_TX_ERR_ALL_PKTS", 1436 .reg_offset_group = {27, 26}, 1437 .group_size = 2 1438 }, { 1439 .reg_name = "MIB_RX_FCS_ERR_PKTS", 1440 .reg_offset_group = {37, 36}, 1441 .group_size = 2 1442 }, { 1443 .reg_name = "IGU_EGU_AUTO_GATE_EN", 1444 .reg_offset_group = {42}, 1445 .group_size = 1 1446 }, { 1447 .reg_name = "IGU_EGU_INT_SRC", 1448 .reg_offset_group = {43}, 1449 .group_size = 1 1450 }, { 1451 .reg_name = "EGU_READY_NUM_CFG", 1452 .reg_offset_group = {44}, 1453 .group_size = 1 1454 }, { 1455 .reg_name = "IGU_EGU_TNL_DFX", 1456 .reg_offset_group = {45}, 1457 .group_size = 1 1458 }, { 1459 .reg_name = "TX_TNL_NOTE_PKT", 1460 .reg_offset_group = {46}, 1461 .group_size = 1 1462 } 1463 }; 1464 1465 static const struct hclge_mod_reg_common_msg hclge_ssu_reg_common_msg[] = { 1466 { 1467 .cmd = HCLGE_OPC_DFX_SSU_REG_0, 1468 .result_regs = hclge_ssu_reg_0_info, 1469 .bd_num = HCLGE_BD_NUM_SSU_REG_0, 1470 .result_regs_size = ARRAY_SIZE(hclge_ssu_reg_0_info) 1471 }, { 1472 .cmd = HCLGE_OPC_DFX_SSU_REG_1, 1473 .result_regs = hclge_ssu_reg_1_info, 1474 .bd_num = HCLGE_BD_NUM_SSU_REG_1, 1475 .result_regs_size = ARRAY_SIZE(hclge_ssu_reg_1_info) 1476 }, { 1477 .cmd = HCLGE_OPC_DFX_RPU_REG_0, 1478 .result_regs = hclge_rpu_reg_0_info, 1479 .bd_num = HCLGE_BD_NUM_RPU_REG_0, 1480 .result_regs_size = ARRAY_SIZE(hclge_rpu_reg_0_info), 1481 .need_para = true 1482 }, { 1483 .cmd = HCLGE_OPC_DFX_RPU_REG_1, 1484 .result_regs = hclge_rpu_reg_1_info, 1485 .bd_num = HCLGE_BD_NUM_RPU_REG_1, 1486 .result_regs_size = ARRAY_SIZE(hclge_rpu_reg_1_info) 1487 }, { 1488 .cmd = HCLGE_OPC_DFX_IGU_EGU_REG, 1489 .result_regs = hclge_igu_egu_reg_info, 1490 .bd_num = HCLGE_BD_NUM_IGU_EGU_REG, 1491 .result_regs_size = ARRAY_SIZE(hclge_igu_egu_reg_info) 1492 }, { 1493 .cmd = HCLGE_OPC_DFX_GEN_REG, 1494 .result_regs = hclge_gen_reg_info_tnl, 1495 .bd_num = HCLGE_BD_NUM_GEN_REG, 1496 .result_regs_size = ARRAY_SIZE(hclge_gen_reg_info_tnl), 1497 .need_para = true 1498 }, { 1499 .cmd = HCLGE_OPC_DFX_GEN_REG, 1500 .result_regs = hclge_gen_reg_info, 1501 .bd_num = HCLGE_BD_NUM_GEN_REG, 1502 .result_regs_size = ARRAY_SIZE(hclge_gen_reg_info) 1503 } 1504 }; 1505 1506 static int 1507 hclge_print_mod_reg_info(struct device *dev, struct hclge_desc *desc, 1508 const struct hclge_mod_reg_info *reg_info, int size) 1509 { 1510 int i, j, pos, actual_len; 1511 u8 offset, bd_idx, index; 1512 char *buf; 1513 1514 buf = kzalloc(HCLGE_MOD_REG_INFO_LEN_MAX, GFP_KERNEL); 1515 if (!buf) 1516 return -ENOMEM; 1517 1518 for (i = 0; i < size; i++) { 1519 actual_len = strlen(reg_info[i].reg_name) + 1520 HCLGE_MOD_REG_EXTRA_LEN + 1521 HCLGE_MOD_REG_VALUE_LEN * reg_info[i].group_size; 1522 if (actual_len > HCLGE_MOD_REG_INFO_LEN_MAX) { 1523 dev_info(dev, "length of reg(%s) is invalid, len=%d\n", 1524 reg_info[i].reg_name, actual_len); 1525 continue; 1526 } 1527 1528 pos = scnprintf(buf, HCLGE_MOD_REG_INFO_LEN_MAX, "%s", 1529 reg_info[i].reg_name); 1530 if (reg_info[i].has_suffix) 1531 pos += scnprintf(buf + pos, 1532 HCLGE_MOD_REG_INFO_LEN_MAX - pos, "%u", 1533 le32_to_cpu(desc->data[0])); 1534 pos += scnprintf(buf + pos, 1535 HCLGE_MOD_REG_INFO_LEN_MAX - pos, 1536 ":"); 1537 for (j = 0; j < reg_info[i].group_size; j++) { 1538 offset = reg_info[i].reg_offset_group[j]; 1539 index = offset % HCLGE_DESC_DATA_LEN; 1540 bd_idx = offset / HCLGE_DESC_DATA_LEN; 1541 pos += scnprintf(buf + pos, 1542 HCLGE_MOD_REG_INFO_LEN_MAX - pos, 1543 " %08x", 1544 le32_to_cpu(desc[bd_idx].data[index])); 1545 } 1546 dev_info(dev, "%s\n", buf); 1547 } 1548 1549 kfree(buf); 1550 return 0; 1551 } 1552 1553 static bool hclge_err_mod_check_support_cmd(enum hclge_opcode_type opcode, 1554 struct hclge_dev *hdev) 1555 { 1556 if (opcode == HCLGE_OPC_DFX_GEN_REG && 1557 !hnae3_ae_dev_gen_reg_dfx_supported(hdev)) 1558 return false; 1559 return true; 1560 } 1561 1562 /* For each common msg, send cmdq to IMP and print result reg info. 1563 * If there is a parameter, loop it and request. 1564 */ 1565 static void 1566 hclge_query_reg_info(struct hclge_dev *hdev, 1567 struct hclge_mod_reg_common_msg *msg, u32 loop_time, 1568 u32 *loop_para) 1569 { 1570 int desc_len, i, ret; 1571 1572 desc_len = msg->bd_num * sizeof(struct hclge_desc); 1573 msg->desc = kzalloc(desc_len, GFP_KERNEL); 1574 if (!msg->desc) { 1575 dev_err(&hdev->pdev->dev, "failed to query reg info, ret=%d", 1576 -ENOMEM); 1577 return; 1578 } 1579 1580 for (i = 0; i < loop_time; i++) { 1581 ret = hclge_dbg_cmd_send(hdev, msg->desc, *loop_para, 1582 msg->bd_num, msg->cmd); 1583 loop_para++; 1584 if (ret) 1585 continue; 1586 ret = hclge_print_mod_reg_info(&hdev->pdev->dev, msg->desc, 1587 msg->result_regs, 1588 msg->result_regs_size); 1589 if (ret) 1590 dev_err(&hdev->pdev->dev, "failed to print mod reg info, ret=%d\n", 1591 ret); 1592 } 1593 1594 kfree(msg->desc); 1595 } 1596 1597 static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) 1598 { 1599 u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0}; 1600 struct hclge_mod_reg_common_msg msg; 1601 u8 i, j, num, loop_time; 1602 1603 num = ARRAY_SIZE(hclge_ssu_reg_common_msg); 1604 for (i = 0; i < num; i++) { 1605 msg = hclge_ssu_reg_common_msg[i]; 1606 if (!hclge_err_mod_check_support_cmd(msg.cmd, hdev)) 1607 continue; 1608 loop_time = 1; 1609 loop_para[0] = 0; 1610 if (msg.need_para) { 1611 loop_time = min(hdev->ae_dev->dev_specs.tnl_num, 1612 HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE); 1613 for (j = 0; j < loop_time; j++) 1614 loop_para[j] = j + 1; 1615 } 1616 hclge_query_reg_info(hdev, &msg, loop_time, loop_para); 1617 } 1618 } 1619 1620 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = { 1621 { 1622 .module_id = MODULE_NONE, 1623 .msg = "MODULE_NONE" 1624 }, { 1625 .module_id = MODULE_BIOS_COMMON, 1626 .msg = "MODULE_BIOS_COMMON" 1627 }, { 1628 .module_id = MODULE_GE, 1629 .msg = "MODULE_GE" 1630 }, { 1631 .module_id = MODULE_IGU_EGU, 1632 .msg = "MODULE_IGU_EGU", 1633 .query_reg_info = hclge_query_reg_info_of_ssu 1634 }, { 1635 .module_id = MODULE_LGE, 1636 .msg = "MODULE_LGE" 1637 }, { 1638 .module_id = MODULE_NCSI, 1639 .msg = "MODULE_NCSI" 1640 }, { 1641 .module_id = MODULE_PPP, 1642 .msg = "MODULE_PPP" 1643 }, { 1644 .module_id = MODULE_QCN, 1645 .msg = "MODULE_QCN" 1646 }, { 1647 .module_id = MODULE_RCB_RX, 1648 .msg = "MODULE_RCB_RX" 1649 }, { 1650 .module_id = MODULE_RTC, 1651 .msg = "MODULE_RTC" 1652 }, { 1653 .module_id = MODULE_SSU, 1654 .msg = "MODULE_SSU", 1655 .query_reg_info = hclge_query_reg_info_of_ssu 1656 }, { 1657 .module_id = MODULE_TM, 1658 .msg = "MODULE_TM" 1659 }, { 1660 .module_id = MODULE_RCB_TX, 1661 .msg = "MODULE_RCB_TX" 1662 }, { 1663 .module_id = MODULE_TXDMA, 1664 .msg = "MODULE_TXDMA" 1665 }, { 1666 .module_id = MODULE_MASTER, 1667 .msg = "MODULE_MASTER" 1668 }, { 1669 .module_id = MODULE_HIMAC, 1670 .msg = "MODULE_HIMAC" 1671 }, { 1672 .module_id = MODULE_ROCEE_TOP, 1673 .msg = "MODULE_ROCEE_TOP" 1674 }, { 1675 .module_id = MODULE_ROCEE_TIMER, 1676 .msg = "MODULE_ROCEE_TIMER" 1677 }, { 1678 .module_id = MODULE_ROCEE_MDB, 1679 .msg = "MODULE_ROCEE_MDB" 1680 }, { 1681 .module_id = MODULE_ROCEE_TSP, 1682 .msg = "MODULE_ROCEE_TSP" 1683 }, { 1684 .module_id = MODULE_ROCEE_TRP, 1685 .msg = "MODULE_ROCEE_TRP" 1686 }, { 1687 .module_id = MODULE_ROCEE_SCC, 1688 .msg = "MODULE_ROCEE_SCC" 1689 }, { 1690 .module_id = MODULE_ROCEE_CAEP, 1691 .msg = "MODULE_ROCEE_CAEP" 1692 }, { 1693 .module_id = MODULE_ROCEE_GEN_AC, 1694 .msg = "MODULE_ROCEE_GEN_AC" 1695 }, { 1696 .module_id = MODULE_ROCEE_QMM, 1697 .msg = "MODULE_ROCEE_QMM" 1698 }, { 1699 .module_id = MODULE_ROCEE_LSAN, 1700 .msg = "MODULE_ROCEE_LSAN" 1701 } 1702 }; 1703 1704 static const struct hclge_hw_type_id hclge_hw_type_id_st[] = { 1705 { 1706 .type_id = NONE_ERROR, 1707 .msg = "none_error" 1708 }, { 1709 .type_id = FIFO_ERROR, 1710 .msg = "fifo_error" 1711 }, { 1712 .type_id = MEMORY_ERROR, 1713 .msg = "memory_error" 1714 }, { 1715 .type_id = POISON_ERROR, 1716 .msg = "poison_error" 1717 }, { 1718 .type_id = MSIX_ECC_ERROR, 1719 .msg = "msix_ecc_error" 1720 }, { 1721 .type_id = TQP_INT_ECC_ERROR, 1722 .msg = "tqp_int_ecc_error" 1723 }, { 1724 .type_id = PF_ABNORMAL_INT_ERROR, 1725 .msg = "pf_abnormal_int_error", 1726 .cause_by_vf = true 1727 }, { 1728 .type_id = MPF_ABNORMAL_INT_ERROR, 1729 .msg = "mpf_abnormal_int_error", 1730 .cause_by_vf = true 1731 }, { 1732 .type_id = COMMON_ERROR, 1733 .msg = "common_error" 1734 }, { 1735 .type_id = PORT_ERROR, 1736 .msg = "port_error" 1737 }, { 1738 .type_id = ETS_ERROR, 1739 .msg = "ets_error" 1740 }, { 1741 .type_id = NCSI_ERROR, 1742 .msg = "ncsi_error" 1743 }, { 1744 .type_id = GLB_ERROR, 1745 .msg = "glb_error" 1746 }, { 1747 .type_id = LINK_ERROR, 1748 .msg = "link_error" 1749 }, { 1750 .type_id = PTP_ERROR, 1751 .msg = "ptp_error" 1752 }, { 1753 .type_id = ROCEE_NORMAL_ERR, 1754 .msg = "rocee_normal_error" 1755 }, { 1756 .type_id = ROCEE_OVF_ERR, 1757 .msg = "rocee_ovf_error" 1758 }, { 1759 .type_id = ROCEE_BUS_ERR, 1760 .msg = "rocee_bus_error" 1761 }, 1762 }; 1763 1764 static void hclge_log_error(struct device *dev, char *reg, 1765 const struct hclge_hw_error *err, 1766 u32 err_sts, unsigned long *reset_requests) 1767 { 1768 while (err->msg) { 1769 if (err->int_msk & err_sts) { 1770 dev_err(dev, "%s %s found [error status=0x%x]\n", 1771 reg, err->msg, err_sts); 1772 if (err->reset_level && 1773 err->reset_level != HNAE3_NONE_RESET) 1774 set_bit(err->reset_level, reset_requests); 1775 } 1776 err++; 1777 } 1778 } 1779 1780 /* hclge_cmd_query_error: read the error information 1781 * @hdev: pointer to struct hclge_dev 1782 * @desc: descriptor for describing the command 1783 * @cmd: command opcode 1784 * @flag: flag for extended command structure 1785 * 1786 * This function query the error info from hw register/s using command 1787 */ 1788 static int hclge_cmd_query_error(struct hclge_dev *hdev, 1789 struct hclge_desc *desc, u32 cmd, u16 flag) 1790 { 1791 struct device *dev = &hdev->pdev->dev; 1792 int desc_num = 1; 1793 int ret; 1794 1795 hclge_cmd_setup_basic_desc(&desc[0], cmd, true); 1796 if (flag) { 1797 desc[0].flag |= cpu_to_le16(flag); 1798 hclge_cmd_setup_basic_desc(&desc[1], cmd, true); 1799 desc_num = 2; 1800 } 1801 1802 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 1803 if (ret) 1804 dev_err(dev, "query error cmd failed (%d)\n", ret); 1805 1806 return ret; 1807 } 1808 1809 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev) 1810 { 1811 struct hclge_desc desc; 1812 1813 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false); 1814 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR); 1815 1816 return hclge_cmd_send(&hdev->hw, &desc, 1); 1817 } 1818 1819 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) 1820 { 1821 struct device *dev = &hdev->pdev->dev; 1822 struct hclge_desc desc[2]; 1823 int ret; 1824 1825 /* configure common error interrupts */ 1826 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); 1827 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1828 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); 1829 1830 if (en) { 1831 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); 1832 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | 1833 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); 1834 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); 1835 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | 1836 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); 1837 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); 1838 } 1839 1840 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); 1841 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | 1842 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); 1843 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); 1844 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | 1845 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); 1846 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); 1847 1848 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1849 if (ret) 1850 dev_err(dev, 1851 "fail(%d) to configure common err interrupts\n", ret); 1852 1853 return ret; 1854 } 1855 1856 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) 1857 { 1858 struct device *dev = &hdev->pdev->dev; 1859 struct hclge_desc desc; 1860 int ret; 1861 1862 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) 1863 return 0; 1864 1865 /* configure NCSI error interrupts */ 1866 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); 1867 if (en) 1868 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); 1869 1870 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1871 if (ret) 1872 dev_err(dev, 1873 "fail(%d) to configure NCSI error interrupts\n", ret); 1874 1875 return ret; 1876 } 1877 1878 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) 1879 { 1880 struct device *dev = &hdev->pdev->dev; 1881 struct hclge_desc desc; 1882 int ret; 1883 1884 /* configure IGU,EGU error interrupts */ 1885 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); 1886 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE); 1887 if (en) 1888 desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN); 1889 1890 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); 1891 1892 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1893 if (ret) { 1894 dev_err(dev, 1895 "fail(%d) to configure IGU common interrupts\n", ret); 1896 return ret; 1897 } 1898 1899 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); 1900 if (en) 1901 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); 1902 1903 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); 1904 1905 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1906 if (ret) { 1907 dev_err(dev, 1908 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); 1909 return ret; 1910 } 1911 1912 ret = hclge_config_ncsi_hw_err_int(hdev, en); 1913 1914 return ret; 1915 } 1916 1917 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, 1918 bool en) 1919 { 1920 struct device *dev = &hdev->pdev->dev; 1921 struct hclge_desc desc[2]; 1922 int ret; 1923 1924 /* configure PPP error interrupts */ 1925 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 1926 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 1927 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 1928 1929 if (cmd == HCLGE_PPP_CMD0_INT_CMD) { 1930 if (en) { 1931 desc[0].data[0] = 1932 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); 1933 desc[0].data[1] = 1934 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); 1935 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); 1936 } 1937 1938 desc[1].data[0] = 1939 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); 1940 desc[1].data[1] = 1941 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); 1942 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1943 desc[1].data[2] = 1944 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); 1945 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { 1946 if (en) { 1947 desc[0].data[0] = 1948 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); 1949 desc[0].data[1] = 1950 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); 1951 } 1952 1953 desc[1].data[0] = 1954 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); 1955 desc[1].data[1] = 1956 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); 1957 } 1958 1959 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1960 if (ret) 1961 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); 1962 1963 return ret; 1964 } 1965 1966 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) 1967 { 1968 int ret; 1969 1970 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, 1971 en); 1972 if (ret) 1973 return ret; 1974 1975 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, 1976 en); 1977 1978 return ret; 1979 } 1980 1981 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) 1982 { 1983 struct device *dev = &hdev->pdev->dev; 1984 struct hclge_desc desc; 1985 int ret; 1986 1987 /* configure TM SCH hw errors */ 1988 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); 1989 if (en) 1990 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); 1991 1992 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1993 if (ret) { 1994 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); 1995 return ret; 1996 } 1997 1998 /* configure TM QCN hw errors */ 1999 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_QCN_MEM_INT_CFG, false); 2000 desc.data[0] = cpu_to_le32(HCLGE_TM_QCN_ERR_INT_TYPE); 2001 if (en) { 2002 desc.data[0] |= cpu_to_le32(HCLGE_TM_QCN_FIFO_INT_EN); 2003 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); 2004 } 2005 2006 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2007 if (ret) 2008 dev_err(dev, 2009 "fail(%d) to configure TM QCN mem errors\n", ret); 2010 2011 return ret; 2012 } 2013 2014 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) 2015 { 2016 struct device *dev = &hdev->pdev->dev; 2017 struct hclge_desc desc; 2018 int ret; 2019 2020 /* configure MAC common error interrupts */ 2021 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); 2022 if (en) 2023 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); 2024 2025 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); 2026 2027 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2028 if (ret) 2029 dev_err(dev, 2030 "fail(%d) to configure MAC COMMON error intr\n", ret); 2031 2032 return ret; 2033 } 2034 2035 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en) 2036 { 2037 struct hclge_desc desc; 2038 2039 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false); 2040 if (en) 2041 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN); 2042 else 2043 desc.data[0] = 0; 2044 2045 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK); 2046 2047 return hclge_cmd_send(&hdev->hw, &desc, 1); 2048 } 2049 2050 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, 2051 bool en) 2052 { 2053 struct device *dev = &hdev->pdev->dev; 2054 struct hclge_desc desc[2]; 2055 int desc_num = 1; 2056 int ret; 2057 2058 /* configure PPU error interrupts */ 2059 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { 2060 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 2061 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2062 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 2063 if (en) { 2064 desc[0].data[0] = 2065 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN); 2066 desc[0].data[1] = 2067 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN); 2068 desc[1].data[3] = 2069 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN); 2070 desc[1].data[4] = 2071 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN); 2072 } 2073 2074 desc[1].data[0] = 2075 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK); 2076 desc[1].data[1] = 2077 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK); 2078 desc[1].data[2] = 2079 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK); 2080 desc[1].data[3] |= 2081 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK); 2082 desc_num = 2; 2083 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { 2084 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 2085 if (en) 2086 desc[0].data[0] = 2087 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2); 2088 2089 desc[0].data[2] = 2090 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK); 2091 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { 2092 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 2093 if (en) 2094 desc[0].data[0] = 2095 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN); 2096 2097 desc[0].data[2] = 2098 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK); 2099 } else { 2100 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); 2101 return -EINVAL; 2102 } 2103 2104 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 2105 2106 return ret; 2107 } 2108 2109 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) 2110 { 2111 struct device *dev = &hdev->pdev->dev; 2112 int ret; 2113 2114 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, 2115 en); 2116 if (ret) { 2117 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", 2118 ret); 2119 return ret; 2120 } 2121 2122 ret = hclge_config_ppu_error_interrupts(hdev, 2123 HCLGE_PPU_MPF_OTHER_INT_CMD, 2124 en); 2125 if (ret) { 2126 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); 2127 return ret; 2128 } 2129 2130 ret = hclge_config_ppu_error_interrupts(hdev, 2131 HCLGE_PPU_PF_OTHER_INT_CMD, en); 2132 if (ret) 2133 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", 2134 ret); 2135 return ret; 2136 } 2137 2138 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) 2139 { 2140 struct device *dev = &hdev->pdev->dev; 2141 struct hclge_desc desc[2]; 2142 int ret; 2143 2144 /* configure SSU ecc error interrupts */ 2145 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); 2146 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2147 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); 2148 if (en) { 2149 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); 2150 desc[0].data[1] = 2151 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); 2152 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); 2153 } 2154 2155 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); 2156 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); 2157 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); 2158 2159 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 2160 if (ret) { 2161 dev_err(dev, 2162 "fail(%d) to configure SSU ECC error interrupt\n", ret); 2163 return ret; 2164 } 2165 2166 /* configure SSU common error interrupts */ 2167 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); 2168 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2169 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); 2170 2171 if (en) { 2172 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 2173 desc[0].data[0] = 2174 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); 2175 else 2176 desc[0].data[0] = 2177 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); 2178 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); 2179 desc[0].data[2] = 2180 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); 2181 } 2182 2183 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | 2184 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); 2185 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); 2186 2187 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 2188 if (ret) 2189 dev_err(dev, 2190 "fail(%d) to configure SSU COMMON error intr\n", ret); 2191 2192 return ret; 2193 } 2194 2195 /* hclge_query_bd_num: query number of buffer descriptors 2196 * @hdev: pointer to struct hclge_dev 2197 * @is_ras: true for ras, false for msix 2198 * @mpf_bd_num: number of main PF interrupt buffer descriptors 2199 * @pf_bd_num: number of not main PF interrupt buffer descriptors 2200 * 2201 * This function querys number of mpf and pf buffer descriptors. 2202 */ 2203 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, 2204 u32 *mpf_bd_num, u32 *pf_bd_num) 2205 { 2206 struct device *dev = &hdev->pdev->dev; 2207 u32 mpf_min_bd_num, pf_min_bd_num; 2208 enum hclge_opcode_type opcode; 2209 struct hclge_desc desc_bd; 2210 int ret; 2211 2212 if (is_ras) { 2213 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; 2214 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; 2215 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; 2216 } else { 2217 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; 2218 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; 2219 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; 2220 } 2221 2222 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); 2223 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 2224 if (ret) { 2225 dev_err(dev, "fail(%d) to query msix int status bd num\n", 2226 ret); 2227 return ret; 2228 } 2229 2230 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); 2231 *pf_bd_num = le32_to_cpu(desc_bd.data[1]); 2232 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { 2233 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", 2234 *mpf_bd_num, *pf_bd_num); 2235 return -EINVAL; 2236 } 2237 2238 return 0; 2239 } 2240 2241 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors 2242 * @hdev: pointer to struct hclge_dev 2243 * @desc: descriptor for describing the command 2244 * @num: number of extended command structures 2245 * 2246 * This function handles all the main PF RAS errors in the 2247 * hw register/s using command. 2248 */ 2249 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, 2250 struct hclge_desc *desc, 2251 int num) 2252 { 2253 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 2254 struct device *dev = &hdev->pdev->dev; 2255 __le32 *desc_data; 2256 u32 status; 2257 int ret; 2258 2259 /* query all main PF RAS errors */ 2260 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, 2261 true); 2262 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2263 if (ret) { 2264 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); 2265 return ret; 2266 } 2267 2268 /* log HNS common errors */ 2269 status = le32_to_cpu(desc[0].data[0]); 2270 if (status) 2271 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", 2272 &hclge_imp_tcm_ecc_int[0], status, 2273 &ae_dev->hw_err_reset_req); 2274 2275 status = le32_to_cpu(desc[0].data[1]); 2276 if (status) 2277 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", 2278 &hclge_cmdq_nic_mem_ecc_int[0], status, 2279 &ae_dev->hw_err_reset_req); 2280 2281 if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) 2282 dev_warn(dev, "imp_rd_data_poison_err found\n"); 2283 2284 status = le32_to_cpu(desc[0].data[3]); 2285 if (status) 2286 hclge_log_error(dev, "TQP_INT_ECC_INT_STS", 2287 &hclge_tqp_int_ecc_int[0], status, 2288 &ae_dev->hw_err_reset_req); 2289 2290 status = le32_to_cpu(desc[0].data[4]); 2291 if (status) 2292 hclge_log_error(dev, "MSIX_ECC_INT_STS", 2293 &hclge_msix_sram_ecc_int[0], status, 2294 &ae_dev->hw_err_reset_req); 2295 2296 /* log SSU(Storage Switch Unit) errors */ 2297 desc_data = (__le32 *)&desc[2]; 2298 status = le32_to_cpu(*(desc_data + 2)); 2299 if (status) 2300 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", 2301 &hclge_ssu_mem_ecc_err_int[0], status, 2302 &ae_dev->hw_err_reset_req); 2303 2304 status = le32_to_cpu(*(desc_data + 3)) & BIT(0); 2305 if (status) { 2306 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", 2307 status); 2308 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 2309 } 2310 2311 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; 2312 if (status) 2313 hclge_log_error(dev, "SSU_COMMON_ERR_INT", 2314 &hclge_ssu_com_err_int[0], status, 2315 &ae_dev->hw_err_reset_req); 2316 2317 /* log IGU(Ingress Unit) errors */ 2318 desc_data = (__le32 *)&desc[3]; 2319 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; 2320 if (status) 2321 hclge_log_error(dev, "IGU_INT_STS", 2322 &hclge_igu_int[0], status, 2323 &ae_dev->hw_err_reset_req); 2324 2325 /* log PPP(Programmable Packet Process) errors */ 2326 desc_data = (__le32 *)&desc[4]; 2327 status = le32_to_cpu(*(desc_data + 1)); 2328 if (status) 2329 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", 2330 &hclge_ppp_mpf_abnormal_int_st1[0], status, 2331 &ae_dev->hw_err_reset_req); 2332 2333 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; 2334 if (status) 2335 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", 2336 &hclge_ppp_mpf_abnormal_int_st3[0], status, 2337 &ae_dev->hw_err_reset_req); 2338 2339 /* log PPU(RCB) errors */ 2340 desc_data = (__le32 *)&desc[5]; 2341 status = le32_to_cpu(*(desc_data + 1)); 2342 if (status) { 2343 dev_err(dev, 2344 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n"); 2345 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 2346 } 2347 2348 status = le32_to_cpu(*(desc_data + 2)); 2349 if (status) 2350 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", 2351 &hclge_ppu_mpf_abnormal_int_st2[0], status, 2352 &ae_dev->hw_err_reset_req); 2353 2354 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; 2355 if (status) 2356 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", 2357 &hclge_ppu_mpf_abnormal_int_st3[0], status, 2358 &ae_dev->hw_err_reset_req); 2359 2360 /* log TM(Traffic Manager) errors */ 2361 desc_data = (__le32 *)&desc[6]; 2362 status = le32_to_cpu(*desc_data); 2363 if (status) 2364 hclge_log_error(dev, "TM_SCH_RINT", 2365 &hclge_tm_sch_rint[0], status, 2366 &ae_dev->hw_err_reset_req); 2367 2368 /* log QCN(Quantized Congestion Control) errors */ 2369 desc_data = (__le32 *)&desc[7]; 2370 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; 2371 if (status) 2372 hclge_log_error(dev, "QCN_FIFO_RINT", 2373 &hclge_qcn_fifo_rint[0], status, 2374 &ae_dev->hw_err_reset_req); 2375 2376 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; 2377 if (status) 2378 hclge_log_error(dev, "QCN_ECC_RINT", 2379 &hclge_qcn_ecc_rint[0], status, 2380 &ae_dev->hw_err_reset_req); 2381 2382 /* log NCSI errors */ 2383 desc_data = (__le32 *)&desc[9]; 2384 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; 2385 if (status) 2386 hclge_log_error(dev, "NCSI_ECC_INT_RPT", 2387 &hclge_ncsi_err_int[0], status, 2388 &ae_dev->hw_err_reset_req); 2389 2390 /* clear all main PF RAS errors */ 2391 hclge_comm_cmd_reuse_desc(&desc[0], false); 2392 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2393 if (ret) 2394 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); 2395 2396 return ret; 2397 } 2398 2399 /* hclge_handle_pf_ras_error: handle all PF RAS errors 2400 * @hdev: pointer to struct hclge_dev 2401 * @desc: descriptor for describing the command 2402 * @num: number of extended command structures 2403 * 2404 * This function handles all the PF RAS errors in the 2405 * hw registers using command. 2406 */ 2407 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, 2408 struct hclge_desc *desc, 2409 int num) 2410 { 2411 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 2412 struct device *dev = &hdev->pdev->dev; 2413 __le32 *desc_data; 2414 u32 status; 2415 int ret; 2416 2417 /* query all PF RAS errors */ 2418 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, 2419 true); 2420 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2421 if (ret) { 2422 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); 2423 return ret; 2424 } 2425 2426 /* log SSU(Storage Switch Unit) errors */ 2427 status = le32_to_cpu(desc[0].data[0]); 2428 if (status) 2429 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2430 &hclge_ssu_port_based_err_int[0], status, 2431 &ae_dev->hw_err_reset_req); 2432 2433 status = le32_to_cpu(desc[0].data[1]); 2434 if (status) 2435 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", 2436 &hclge_ssu_fifo_overflow_int[0], status, 2437 &ae_dev->hw_err_reset_req); 2438 2439 status = le32_to_cpu(desc[0].data[2]); 2440 if (status) 2441 hclge_log_error(dev, "SSU_ETS_TCG_INT", 2442 &hclge_ssu_ets_tcg_int[0], status, 2443 &ae_dev->hw_err_reset_req); 2444 2445 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ 2446 desc_data = (__le32 *)&desc[1]; 2447 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; 2448 if (status) 2449 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", 2450 &hclge_igu_egu_tnl_int[0], status, 2451 &ae_dev->hw_err_reset_req); 2452 2453 /* log PPU(RCB) errors */ 2454 desc_data = (__le32 *)&desc[3]; 2455 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; 2456 if (status) { 2457 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", 2458 &hclge_ppu_pf_abnormal_int[0], status, 2459 &ae_dev->hw_err_reset_req); 2460 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR); 2461 } 2462 2463 /* clear all PF RAS errors */ 2464 hclge_comm_cmd_reuse_desc(&desc[0], false); 2465 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 2466 if (ret) 2467 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); 2468 2469 return ret; 2470 } 2471 2472 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) 2473 { 2474 u32 mpf_bd_num, pf_bd_num, bd_num; 2475 struct hclge_desc *desc; 2476 int ret; 2477 2478 /* query the number of registers in the RAS int status */ 2479 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); 2480 if (ret) 2481 return ret; 2482 2483 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 2484 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 2485 if (!desc) 2486 return -ENOMEM; 2487 2488 /* handle all main PF RAS errors */ 2489 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); 2490 if (ret) { 2491 kfree(desc); 2492 return ret; 2493 } 2494 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 2495 2496 /* handle all PF RAS errors */ 2497 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); 2498 kfree(desc); 2499 2500 return ret; 2501 } 2502 2503 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) 2504 { 2505 struct device *dev = &hdev->pdev->dev; 2506 struct hclge_desc desc[3]; 2507 int ret; 2508 2509 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2510 true); 2511 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2512 true); 2513 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 2514 true); 2515 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2516 desc[1].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 2517 2518 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); 2519 if (ret) { 2520 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); 2521 return ret; 2522 } 2523 2524 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", 2525 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2526 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2527 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2528 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", 2529 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), 2530 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), 2531 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); 2532 dev_err(dev, "AXI3: %08X %08X %08X %08X\n", 2533 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), 2534 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); 2535 2536 return 0; 2537 } 2538 2539 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) 2540 { 2541 struct device *dev = &hdev->pdev->dev; 2542 struct hclge_desc desc[2]; 2543 int ret; 2544 2545 ret = hclge_cmd_query_error(hdev, &desc[0], 2546 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, 2547 HCLGE_COMM_CMD_FLAG_NEXT); 2548 if (ret) { 2549 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); 2550 return ret; 2551 } 2552 2553 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", 2554 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 2555 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 2556 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 2557 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), 2558 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); 2559 2560 return 0; 2561 } 2562 2563 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) 2564 { 2565 struct device *dev = &hdev->pdev->dev; 2566 struct hclge_desc desc[2]; 2567 int ret; 2568 2569 /* read overflow error status */ 2570 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 2571 0); 2572 if (ret) { 2573 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); 2574 return ret; 2575 } 2576 2577 /* log overflow error */ 2578 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2579 const struct hclge_hw_error *err; 2580 u32 err_sts; 2581 2582 err = &hclge_rocee_qmm_ovf_err_int[0]; 2583 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & 2584 le32_to_cpu(desc[0].data[0]); 2585 while (err->msg) { 2586 if (err->int_msk == err_sts) { 2587 dev_err(dev, "%s [error status=0x%x] found\n", 2588 err->msg, 2589 le32_to_cpu(desc[0].data[0])); 2590 break; 2591 } 2592 err++; 2593 } 2594 } 2595 2596 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2597 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n", 2598 le32_to_cpu(desc[0].data[1])); 2599 } 2600 2601 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 2602 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n", 2603 le32_to_cpu(desc[0].data[2])); 2604 } 2605 2606 return 0; 2607 } 2608 2609 static enum hnae3_reset_type 2610 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) 2611 { 2612 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET; 2613 struct device *dev = &hdev->pdev->dev; 2614 struct hclge_desc desc[2]; 2615 unsigned int status; 2616 int ret; 2617 2618 /* read RAS error interrupt status */ 2619 ret = hclge_cmd_query_error(hdev, &desc[0], 2620 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0); 2621 if (ret) { 2622 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); 2623 /* reset everything for now */ 2624 return HNAE3_GLOBAL_RESET; 2625 } 2626 2627 status = le32_to_cpu(desc[0].data[0]); 2628 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { 2629 if (status & HCLGE_ROCEE_RERR_INT_MASK) 2630 dev_err(dev, "ROCEE RAS AXI rresp error\n"); 2631 2632 if (status & HCLGE_ROCEE_BERR_INT_MASK) 2633 dev_err(dev, "ROCEE RAS AXI bresp error\n"); 2634 2635 reset_type = HNAE3_FUNC_RESET; 2636 2637 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR); 2638 2639 ret = hclge_log_rocee_axi_error(hdev); 2640 if (ret) 2641 return HNAE3_GLOBAL_RESET; 2642 } 2643 2644 if (status & HCLGE_ROCEE_ECC_INT_MASK) { 2645 dev_err(dev, "ROCEE RAS 2bit ECC error\n"); 2646 reset_type = HNAE3_GLOBAL_RESET; 2647 2648 ret = hclge_log_rocee_ecc_error(hdev); 2649 if (ret) 2650 return HNAE3_GLOBAL_RESET; 2651 } 2652 2653 if (status & HCLGE_ROCEE_OVF_INT_MASK) { 2654 ret = hclge_log_rocee_ovf_error(hdev); 2655 if (ret) { 2656 dev_err(dev, "failed(%d) to process ovf error\n", ret); 2657 /* reset everything for now */ 2658 return HNAE3_GLOBAL_RESET; 2659 } 2660 } 2661 2662 /* clear error status */ 2663 hclge_comm_cmd_reuse_desc(&desc[0], false); 2664 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 2665 if (ret) { 2666 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); 2667 /* reset everything for now */ 2668 return HNAE3_GLOBAL_RESET; 2669 } 2670 2671 return reset_type; 2672 } 2673 2674 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) 2675 { 2676 struct device *dev = &hdev->pdev->dev; 2677 struct hclge_desc desc; 2678 int ret; 2679 2680 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 || 2681 !hnae3_dev_roce_supported(hdev)) 2682 return 0; 2683 2684 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); 2685 if (en) { 2686 /* enable ROCEE hw error interrupts */ 2687 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); 2688 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); 2689 2690 hclge_log_and_clear_rocee_ras_error(hdev); 2691 } 2692 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); 2693 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); 2694 2695 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2696 if (ret) 2697 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); 2698 2699 return ret; 2700 } 2701 2702 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) 2703 { 2704 struct hclge_dev *hdev = ae_dev->priv; 2705 enum hnae3_reset_type reset_type; 2706 2707 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) 2708 return; 2709 2710 reset_type = hclge_log_and_clear_rocee_ras_error(hdev); 2711 if (reset_type != HNAE3_NONE_RESET) 2712 set_bit(reset_type, &ae_dev->hw_err_reset_req); 2713 } 2714 2715 static const struct hclge_hw_blk hw_blk[] = { 2716 { 2717 .msk = BIT(0), 2718 .name = "IGU_EGU", 2719 .config_err_int = hclge_config_igu_egu_hw_err_int, 2720 }, { 2721 .msk = BIT(1), 2722 .name = "PPP", 2723 .config_err_int = hclge_config_ppp_hw_err_int, 2724 }, { 2725 .msk = BIT(2), 2726 .name = "SSU", 2727 .config_err_int = hclge_config_ssu_hw_err_int, 2728 }, { 2729 .msk = BIT(3), 2730 .name = "PPU", 2731 .config_err_int = hclge_config_ppu_hw_err_int, 2732 }, { 2733 .msk = BIT(4), 2734 .name = "TM", 2735 .config_err_int = hclge_config_tm_hw_err_int, 2736 }, { 2737 .msk = BIT(5), 2738 .name = "COMMON", 2739 .config_err_int = hclge_config_common_hw_err_int, 2740 }, { 2741 .msk = BIT(8), 2742 .name = "MAC", 2743 .config_err_int = hclge_config_mac_err_int, 2744 }, { 2745 /* sentinel */ 2746 } 2747 }; 2748 2749 static void hclge_config_all_msix_error(struct hclge_dev *hdev, bool enable) 2750 { 2751 u32 reg_val; 2752 2753 reg_val = hclge_read_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG); 2754 2755 if (enable) 2756 reg_val |= BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2757 else 2758 reg_val &= ~BIT(HCLGE_VECTOR0_ALL_MSIX_ERR_B); 2759 2760 hclge_write_dev(&hdev->hw, HCLGE_PF_OTHER_INT_REG, reg_val); 2761 } 2762 2763 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) 2764 { 2765 const struct hclge_hw_blk *module = hw_blk; 2766 int ret = 0; 2767 2768 hclge_config_all_msix_error(hdev, state); 2769 2770 while (module->name) { 2771 if (module->config_err_int) { 2772 ret = module->config_err_int(hdev, state); 2773 if (ret) 2774 return ret; 2775 } 2776 module++; 2777 } 2778 2779 return ret; 2780 } 2781 2782 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) 2783 { 2784 struct hclge_dev *hdev = ae_dev->priv; 2785 struct device *dev = &hdev->pdev->dev; 2786 u32 status; 2787 2788 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 2789 dev_err(dev, 2790 "Can't recover - RAS error reported during dev init\n"); 2791 return PCI_ERS_RESULT_NONE; 2792 } 2793 2794 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 2795 if (status & HCLGE_RAS_REG_NFE_MASK || 2796 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) 2797 ae_dev->hw_err_reset_req = 0; 2798 else 2799 goto out; 2800 2801 /* Handling Non-fatal HNS RAS errors */ 2802 if (status & HCLGE_RAS_REG_NFE_MASK) { 2803 dev_err(dev, 2804 "HNS Non-Fatal RAS error(status=0x%x) identified\n", 2805 status); 2806 hclge_handle_all_ras_errors(hdev); 2807 } 2808 2809 /* Handling Non-fatal Rocee RAS errors */ 2810 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && 2811 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { 2812 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n"); 2813 hclge_handle_rocee_ras_error(ae_dev); 2814 } 2815 2816 if (ae_dev->hw_err_reset_req) 2817 return PCI_ERS_RESULT_NEED_RESET; 2818 2819 out: 2820 return PCI_ERS_RESULT_RECOVERED; 2821 } 2822 2823 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, 2824 struct hclge_desc *desc, bool is_mpf, 2825 u32 bd_num) 2826 { 2827 if (is_mpf) 2828 desc[0].opcode = 2829 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); 2830 else 2831 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); 2832 2833 desc[0].flag = cpu_to_le16(HCLGE_COMM_CMD_FLAG_NO_INTR | 2834 HCLGE_COMM_CMD_FLAG_IN); 2835 2836 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); 2837 } 2838 2839 /* hclge_query_8bd_info: query information about over_8bd_nfe_err 2840 * @hdev: pointer to struct hclge_dev 2841 * @vf_id: Index of the virtual function with error 2842 * @q_id: Physical index of the queue with error 2843 * 2844 * This function get specific index of queue and function which causes 2845 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is 2846 * caused by PF instead of VF. 2847 */ 2848 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, 2849 u16 *q_id) 2850 { 2851 struct hclge_query_ppu_pf_other_int_dfx_cmd *req; 2852 struct hclge_desc desc; 2853 int ret; 2854 2855 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); 2856 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 2857 if (ret) 2858 return ret; 2859 2860 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; 2861 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); 2862 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); 2863 2864 return 0; 2865 } 2866 2867 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err 2868 * @hdev: pointer to struct hclge_dev 2869 * @reset_requests: reset level that we need to trigger later 2870 * 2871 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in 2872 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. 2873 */ 2874 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, 2875 unsigned long *reset_requests) 2876 { 2877 struct device *dev = &hdev->pdev->dev; 2878 u16 vf_id; 2879 u16 q_id; 2880 int ret; 2881 2882 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); 2883 if (ret) { 2884 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", 2885 ret); 2886 return; 2887 } 2888 2889 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vport(%u), queue_id(%u)\n", 2890 vf_id, q_id); 2891 2892 if (vf_id) { 2893 if (vf_id >= hdev->num_alloc_vport) { 2894 dev_err(dev, "invalid vport(%u)\n", vf_id); 2895 return; 2896 } 2897 2898 /* If we need to trigger other reset whose level is higher 2899 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset 2900 * here. 2901 */ 2902 if (*reset_requests != 0) 2903 return; 2904 2905 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); 2906 if (ret) 2907 dev_err(dev, "inform reset to vport(%u) failed %d!\n", 2908 vf_id, ret); 2909 } else { 2910 set_bit(HNAE3_FUNC_RESET, reset_requests); 2911 } 2912 } 2913 2914 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors 2915 * @hdev: pointer to struct hclge_dev 2916 * @desc: descriptor for describing the command 2917 * @mpf_bd_num: number of extended command structures 2918 * @reset_requests: record of the reset level that we need 2919 * 2920 * This function handles all the main PF MSI-X errors in the hw register/s 2921 * using command. 2922 */ 2923 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, 2924 struct hclge_desc *desc, 2925 int mpf_bd_num, 2926 unsigned long *reset_requests) 2927 { 2928 struct device *dev = &hdev->pdev->dev; 2929 __le32 *desc_data; 2930 u32 status; 2931 int ret; 2932 /* query all main PF MSIx errors */ 2933 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, 2934 true); 2935 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); 2936 if (ret) { 2937 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); 2938 return ret; 2939 } 2940 2941 /* log MAC errors */ 2942 desc_data = (__le32 *)&desc[1]; 2943 status = le32_to_cpu(*desc_data); 2944 if (status) 2945 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", 2946 &hclge_mac_afifo_tnl_int[0], status, 2947 reset_requests); 2948 2949 /* log PPU(RCB) MPF errors */ 2950 desc_data = (__le32 *)&desc[5]; 2951 status = le32_to_cpu(*(desc_data + 2)) & 2952 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; 2953 if (status) 2954 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", 2955 status); 2956 2957 /* clear all main PF MSIx errors */ 2958 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 2959 if (ret) 2960 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); 2961 2962 return ret; 2963 } 2964 2965 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors 2966 * @hdev: pointer to struct hclge_dev 2967 * @desc: descriptor for describing the command 2968 * @mpf_bd_num: number of extended command structures 2969 * @reset_requests: record of the reset level that we need 2970 * 2971 * This function handles all the PF MSI-X errors in the hw register/s using 2972 * command. 2973 */ 2974 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, 2975 struct hclge_desc *desc, 2976 int pf_bd_num, 2977 unsigned long *reset_requests) 2978 { 2979 struct device *dev = &hdev->pdev->dev; 2980 __le32 *desc_data; 2981 u32 status; 2982 int ret; 2983 2984 /* query all PF MSIx errors */ 2985 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, 2986 true); 2987 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); 2988 if (ret) { 2989 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); 2990 return ret; 2991 } 2992 2993 /* log SSU PF errors */ 2994 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; 2995 if (status) 2996 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 2997 &hclge_ssu_port_based_pf_int[0], 2998 status, reset_requests); 2999 3000 /* read and log PPP PF errors */ 3001 desc_data = (__le32 *)&desc[2]; 3002 status = le32_to_cpu(*desc_data); 3003 if (status) 3004 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", 3005 &hclge_ppp_pf_abnormal_int[0], 3006 status, reset_requests); 3007 3008 /* log PPU(RCB) PF errors */ 3009 desc_data = (__le32 *)&desc[3]; 3010 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; 3011 if (status) 3012 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", 3013 &hclge_ppu_pf_abnormal_int[0], 3014 status, reset_requests); 3015 3016 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; 3017 if (status) 3018 hclge_handle_over_8bd_err(hdev, reset_requests); 3019 3020 /* clear all PF MSIx errors */ 3021 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 3022 if (ret) 3023 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); 3024 3025 return ret; 3026 } 3027 3028 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, 3029 unsigned long *reset_requests) 3030 { 3031 u32 mpf_bd_num, pf_bd_num, bd_num; 3032 struct hclge_desc *desc; 3033 int ret; 3034 3035 /* query the number of bds for the MSIx int status */ 3036 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 3037 if (ret) 3038 goto out; 3039 3040 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 3041 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 3042 if (!desc) 3043 return -ENOMEM; 3044 3045 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, 3046 reset_requests); 3047 if (ret) 3048 goto msi_error; 3049 3050 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 3051 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); 3052 if (ret) 3053 goto msi_error; 3054 3055 ret = hclge_handle_mac_tnl(hdev); 3056 3057 msi_error: 3058 kfree(desc); 3059 out: 3060 return ret; 3061 } 3062 3063 int hclge_handle_hw_msix_error(struct hclge_dev *hdev, 3064 unsigned long *reset_requests) 3065 { 3066 struct device *dev = &hdev->pdev->dev; 3067 3068 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 3069 dev_err(dev, 3070 "failed to handle msix error during dev init\n"); 3071 return -EAGAIN; 3072 } 3073 3074 return hclge_handle_all_hw_msix_error(hdev, reset_requests); 3075 } 3076 3077 int hclge_handle_mac_tnl(struct hclge_dev *hdev) 3078 { 3079 struct hclge_mac_tnl_stats mac_tnl_stats; 3080 struct device *dev = &hdev->pdev->dev; 3081 struct hclge_desc desc; 3082 u32 status; 3083 int ret; 3084 3085 /* query and clear mac tnl interruptions */ 3086 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_MAC_TNL_INT, true); 3087 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 3088 if (ret) { 3089 dev_err(dev, "failed to query mac tnl int, ret = %d.\n", ret); 3090 return ret; 3091 } 3092 3093 status = le32_to_cpu(desc.data[0]); 3094 if (status) { 3095 /* When mac tnl interrupt occurs, we record current time and 3096 * register status here in a fifo, then clear the status. So 3097 * that if link status changes suddenly at some time, we can 3098 * query them by debugfs. 3099 */ 3100 mac_tnl_stats.time = local_clock(); 3101 mac_tnl_stats.status = status; 3102 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats); 3103 ret = hclge_clear_mac_tnl_int(hdev); 3104 if (ret) 3105 dev_err(dev, "failed to clear mac tnl int, ret = %d.\n", 3106 ret); 3107 } 3108 3109 return ret; 3110 } 3111 3112 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) 3113 { 3114 struct hclge_dev *hdev = ae_dev->priv; 3115 struct device *dev = &hdev->pdev->dev; 3116 u32 mpf_bd_num, pf_bd_num, bd_num; 3117 struct hclge_desc *desc; 3118 u32 status; 3119 int ret; 3120 3121 ae_dev->hw_err_reset_req = 0; 3122 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 3123 3124 /* query the number of bds for the MSIx int status */ 3125 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 3126 if (ret) 3127 return; 3128 3129 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 3130 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 3131 if (!desc) 3132 return; 3133 3134 /* Clear HNS hw errors reported through msix */ 3135 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - 3136 HCLGE_DESC_NO_DATA_LEN); 3137 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 3138 if (ret) { 3139 dev_err(dev, "fail(%d) to clear mpf msix int during init\n", 3140 ret); 3141 goto msi_error; 3142 } 3143 3144 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - 3145 HCLGE_DESC_NO_DATA_LEN); 3146 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 3147 if (ret) { 3148 dev_err(dev, "fail(%d) to clear pf msix int during init\n", 3149 ret); 3150 goto msi_error; 3151 } 3152 3153 /* Handle Non-fatal HNS RAS errors */ 3154 if (status & HCLGE_RAS_REG_NFE_MASK) { 3155 dev_err(dev, "HNS hw error(RAS) identified during init\n"); 3156 hclge_handle_all_ras_errors(hdev); 3157 } 3158 3159 msi_error: 3160 kfree(desc); 3161 } 3162 3163 bool hclge_find_error_source(struct hclge_dev *hdev) 3164 { 3165 u32 msix_src_flag, hw_err_src_flag; 3166 3167 msix_src_flag = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS) & 3168 HCLGE_VECTOR0_REG_MSIX_MASK; 3169 3170 hw_err_src_flag = hclge_read_dev(&hdev->hw, 3171 HCLGE_RAS_PF_OTHER_INT_STS_REG) & 3172 HCLGE_RAS_REG_ERR_MASK; 3173 3174 return msix_src_flag || hw_err_src_flag; 3175 } 3176 3177 void hclge_handle_occurred_error(struct hclge_dev *hdev) 3178 { 3179 struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); 3180 3181 if (hclge_find_error_source(hdev)) 3182 hclge_handle_error_info_log(ae_dev); 3183 } 3184 3185 static bool 3186 hclge_handle_error_type_reg_log(struct hclge_dev *hdev, 3187 struct hclge_mod_err_info *mod_info, 3188 struct hclge_type_reg_err_info *type_reg_info) 3189 { 3190 #define HCLGE_ERR_TYPE_MASK 0x7F 3191 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7 3192 3193 u8 mod_id, total_module, type_id, total_type, i, is_ras; 3194 struct device *dev = &hdev->pdev->dev; 3195 u8 index_module = MODULE_NONE; 3196 u8 index_type = NONE_ERROR; 3197 bool cause_by_vf = false; 3198 3199 mod_id = mod_info->mod_id; 3200 type_id = type_reg_info->type_id & HCLGE_ERR_TYPE_MASK; 3201 is_ras = type_reg_info->type_id >> HCLGE_ERR_TYPE_IS_RAS_OFFSET; 3202 3203 total_module = ARRAY_SIZE(hclge_hw_module_id_st); 3204 total_type = ARRAY_SIZE(hclge_hw_type_id_st); 3205 3206 for (i = 0; i < total_module; i++) { 3207 if (mod_id == hclge_hw_module_id_st[i].module_id) { 3208 index_module = i; 3209 break; 3210 } 3211 } 3212 3213 for (i = 0; i < total_type; i++) { 3214 if (type_id == hclge_hw_type_id_st[i].type_id) { 3215 index_type = i; 3216 cause_by_vf = hclge_hw_type_id_st[i].cause_by_vf; 3217 break; 3218 } 3219 } 3220 3221 if (index_module != MODULE_NONE && index_type != NONE_ERROR) 3222 dev_err(dev, 3223 "found %s %s, is %s error.\n", 3224 hclge_hw_module_id_st[index_module].msg, 3225 hclge_hw_type_id_st[index_type].msg, 3226 is_ras ? "ras" : "msix"); 3227 else 3228 dev_err(dev, 3229 "unknown module[%u] or type[%u].\n", mod_id, type_id); 3230 3231 dev_err(dev, "reg_value:\n"); 3232 for (i = 0; i < type_reg_info->reg_num; i++) 3233 dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]); 3234 3235 if (hclge_hw_module_id_st[index_module].query_reg_info) 3236 hclge_hw_module_id_st[index_module].query_reg_info(hdev); 3237 3238 return cause_by_vf; 3239 } 3240 3241 static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev, 3242 const u32 *buf, u32 buf_size) 3243 { 3244 struct hclge_type_reg_err_info *type_reg_info; 3245 struct hclge_dev *hdev = ae_dev->priv; 3246 struct device *dev = &hdev->pdev->dev; 3247 struct hclge_mod_err_info *mod_info; 3248 struct hclge_sum_err_info *sum_info; 3249 bool cause_by_vf = false; 3250 u8 mod_num, err_num, i; 3251 u32 offset = 0; 3252 3253 sum_info = (struct hclge_sum_err_info *)&buf[offset++]; 3254 if (sum_info->reset_type && 3255 sum_info->reset_type != HNAE3_NONE_RESET) 3256 set_bit(sum_info->reset_type, &ae_dev->hw_err_reset_req); 3257 mod_num = sum_info->mod_num; 3258 3259 while (mod_num--) { 3260 if (offset >= buf_size) { 3261 dev_err(dev, "The offset(%u) exceeds buf's size(%u).\n", 3262 offset, buf_size); 3263 return; 3264 } 3265 mod_info = (struct hclge_mod_err_info *)&buf[offset++]; 3266 err_num = mod_info->err_num; 3267 3268 for (i = 0; i < err_num; i++) { 3269 if (offset >= buf_size) { 3270 dev_err(dev, 3271 "The offset(%u) exceeds buf size(%u).\n", 3272 offset, buf_size); 3273 return; 3274 } 3275 3276 type_reg_info = (struct hclge_type_reg_err_info *) 3277 &buf[offset++]; 3278 if (hclge_handle_error_type_reg_log(hdev, mod_info, 3279 type_reg_info)) 3280 cause_by_vf = true; 3281 3282 offset += type_reg_info->reg_num; 3283 } 3284 } 3285 3286 if (hnae3_ae_dev_vf_fault_supported(hdev->ae_dev) && cause_by_vf) 3287 set_bit(HNAE3_VF_EXP_RESET, &ae_dev->hw_err_reset_req); 3288 } 3289 3290 static int hclge_query_all_err_bd_num(struct hclge_dev *hdev, u32 *bd_num) 3291 { 3292 struct device *dev = &hdev->pdev->dev; 3293 struct hclge_desc desc_bd; 3294 int ret; 3295 3296 hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_ALL_ERR_BD_NUM, true); 3297 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 3298 if (ret) { 3299 dev_err(dev, "failed to query error bd_num, ret = %d.\n", ret); 3300 return ret; 3301 } 3302 3303 *bd_num = le32_to_cpu(desc_bd.data[0]); 3304 if (!(*bd_num)) { 3305 dev_err(dev, "The value of bd_num is 0!\n"); 3306 return -EINVAL; 3307 } 3308 3309 return 0; 3310 } 3311 3312 static int hclge_query_all_err_info(struct hclge_dev *hdev, 3313 struct hclge_desc *desc, u32 bd_num) 3314 { 3315 struct device *dev = &hdev->pdev->dev; 3316 int ret; 3317 3318 hclge_cmd_setup_basic_desc(desc, HCLGE_QUERY_ALL_ERR_INFO, true); 3319 ret = hclge_cmd_send(&hdev->hw, desc, bd_num); 3320 if (ret) 3321 dev_err(dev, "failed to query error info, ret = %d.\n", ret); 3322 3323 return ret; 3324 } 3325 3326 int hclge_handle_error_info_log(struct hnae3_ae_dev *ae_dev) 3327 { 3328 u32 bd_num, desc_len, buf_len, buf_size, i; 3329 struct hclge_dev *hdev = ae_dev->priv; 3330 struct hclge_desc *desc; 3331 __le32 *desc_data; 3332 u32 *buf; 3333 int ret; 3334 3335 ret = hclge_query_all_err_bd_num(hdev, &bd_num); 3336 if (ret) 3337 goto out; 3338 3339 desc_len = bd_num * sizeof(struct hclge_desc); 3340 desc = kzalloc(desc_len, GFP_KERNEL); 3341 if (!desc) { 3342 ret = -ENOMEM; 3343 goto out; 3344 } 3345 3346 ret = hclge_query_all_err_info(hdev, desc, bd_num); 3347 if (ret) 3348 goto err_desc; 3349 3350 buf_len = bd_num * sizeof(struct hclge_desc) - HCLGE_DESC_NO_DATA_LEN; 3351 buf_size = buf_len / sizeof(u32); 3352 3353 desc_data = kzalloc(buf_len, GFP_KERNEL); 3354 if (!desc_data) { 3355 ret = -ENOMEM; 3356 goto err_desc; 3357 } 3358 3359 buf = kzalloc(buf_len, GFP_KERNEL); 3360 if (!buf) { 3361 ret = -ENOMEM; 3362 goto err_buf_alloc; 3363 } 3364 3365 memcpy(desc_data, &desc[0].data[0], buf_len); 3366 for (i = 0; i < buf_size; i++) 3367 buf[i] = le32_to_cpu(desc_data[i]); 3368 3369 hclge_handle_error_module_log(ae_dev, buf, buf_size); 3370 kfree(buf); 3371 3372 err_buf_alloc: 3373 kfree(desc_data); 3374 err_desc: 3375 kfree(desc); 3376 out: 3377 return ret; 3378 } 3379 3380 static bool hclge_reset_vf_in_bitmap(struct hclge_dev *hdev, 3381 unsigned long *bitmap) 3382 { 3383 struct hclge_vport *vport; 3384 bool exist_set = false; 3385 int func_id; 3386 int ret; 3387 3388 func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM); 3389 if (func_id == PF_VPORT_ID) 3390 return false; 3391 3392 while (func_id != HCLGE_VPORT_NUM) { 3393 vport = hclge_get_vf_vport(hdev, 3394 func_id - HCLGE_VF_VPORT_START_NUM); 3395 if (!vport) { 3396 dev_err(&hdev->pdev->dev, "invalid func id(%d)\n", 3397 func_id); 3398 return false; 3399 } 3400 3401 dev_info(&hdev->pdev->dev, "do function %d recovery.", func_id); 3402 3403 ret = hclge_reset_tqp(&vport->nic); 3404 if (ret) { 3405 dev_err(&hdev->pdev->dev, 3406 "failed to reset tqp, ret = %d.", ret); 3407 return false; 3408 } 3409 3410 ret = hclge_inform_vf_reset(vport, HNAE3_VF_FUNC_RESET); 3411 if (ret) { 3412 dev_err(&hdev->pdev->dev, 3413 "failed to reset func %d, ret = %d.", 3414 func_id, ret); 3415 return false; 3416 } 3417 3418 exist_set = true; 3419 clear_bit(func_id, bitmap); 3420 func_id = find_first_bit(bitmap, HCLGE_VPORT_NUM); 3421 } 3422 3423 return exist_set; 3424 } 3425 3426 static void hclge_get_vf_fault_bitmap(struct hclge_desc *desc, 3427 unsigned long *bitmap) 3428 { 3429 #define HCLGE_FIR_FAULT_BYTES 24 3430 #define HCLGE_SEC_FAULT_BYTES 8 3431 3432 u8 *buff; 3433 3434 BUILD_BUG_ON(HCLGE_FIR_FAULT_BYTES + HCLGE_SEC_FAULT_BYTES != 3435 BITS_TO_BYTES(HCLGE_VPORT_NUM)); 3436 3437 memcpy(bitmap, desc[0].data, HCLGE_FIR_FAULT_BYTES); 3438 buff = (u8 *)bitmap + HCLGE_FIR_FAULT_BYTES; 3439 memcpy(buff, desc[1].data, HCLGE_SEC_FAULT_BYTES); 3440 } 3441 3442 int hclge_handle_vf_queue_err_ras(struct hclge_dev *hdev) 3443 { 3444 unsigned long vf_fault_bitmap[BITS_TO_LONGS(HCLGE_VPORT_NUM)]; 3445 struct hclge_desc desc[2]; 3446 bool cause_by_vf = false; 3447 int ret; 3448 3449 if (!test_and_clear_bit(HNAE3_VF_EXP_RESET, 3450 &hdev->ae_dev->hw_err_reset_req) || 3451 !hnae3_ae_dev_vf_fault_supported(hdev->ae_dev)) 3452 return 0; 3453 3454 hclge_comm_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_GET_QUEUE_ERR_VF, 3455 true); 3456 desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT); 3457 hclge_comm_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_GET_QUEUE_ERR_VF, 3458 true); 3459 3460 ret = hclge_comm_cmd_send(&hdev->hw.hw, desc, 2); 3461 if (ret) { 3462 dev_err(&hdev->pdev->dev, 3463 "failed to get vf bitmap, ret = %d.\n", ret); 3464 return ret; 3465 } 3466 hclge_get_vf_fault_bitmap(desc, vf_fault_bitmap); 3467 3468 cause_by_vf = hclge_reset_vf_in_bitmap(hdev, vf_fault_bitmap); 3469 if (cause_by_vf) 3470 hdev->ae_dev->hw_err_reset_req = 0; 3471 3472 return 0; 3473 } 3474