1 // SPDX-License-Identifier: GPL-2.0+ 2 /* Copyright (c) 2016-2017 Hisilicon Limited. */ 3 4 #include "hclge_err.h" 5 6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = { 7 { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err", 8 .reset_level = HNAE3_NONE_RESET }, 9 { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err", 10 .reset_level = HNAE3_NONE_RESET }, 11 { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err", 12 .reset_level = HNAE3_NONE_RESET }, 13 { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err", 14 .reset_level = HNAE3_NONE_RESET }, 15 { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err", 16 .reset_level = HNAE3_NONE_RESET }, 17 { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err", 18 .reset_level = HNAE3_NONE_RESET }, 19 { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err", 20 .reset_level = HNAE3_NONE_RESET }, 21 { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err", 22 .reset_level = HNAE3_NONE_RESET }, 23 { .int_msk = BIT(17), .msg = "imp_itcm4_ecc_mbit_err", 24 .reset_level = HNAE3_NONE_RESET }, 25 { /* sentinel */ } 26 }; 27 28 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = { 29 { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err", 30 .reset_level = HNAE3_NONE_RESET }, 31 { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err", 32 .reset_level = HNAE3_NONE_RESET }, 33 { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err", 34 .reset_level = HNAE3_NONE_RESET }, 35 { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err", 36 .reset_level = HNAE3_NONE_RESET }, 37 { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err", 38 .reset_level = HNAE3_NONE_RESET }, 39 { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err", 40 .reset_level = HNAE3_NONE_RESET }, 41 { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err", 42 .reset_level = HNAE3_NONE_RESET }, 43 { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err", 44 .reset_level = HNAE3_NONE_RESET }, 45 { .int_msk = BIT(17), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err", 46 .reset_level = HNAE3_NONE_RESET }, 47 { .int_msk = BIT(19), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err", 48 .reset_level = HNAE3_NONE_RESET }, 49 { .int_msk = BIT(21), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err", 50 .reset_level = HNAE3_NONE_RESET }, 51 { .int_msk = BIT(23), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err", 52 .reset_level = HNAE3_NONE_RESET }, 53 { .int_msk = BIT(25), .msg = "cmdq_rocee_rx_head_ecc_mbit_err", 54 .reset_level = HNAE3_NONE_RESET }, 55 { .int_msk = BIT(27), .msg = "cmdq_rocee_tx_head_ecc_mbit_err", 56 .reset_level = HNAE3_NONE_RESET }, 57 { .int_msk = BIT(29), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err", 58 .reset_level = HNAE3_NONE_RESET }, 59 { .int_msk = BIT(31), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err", 60 .reset_level = HNAE3_NONE_RESET }, 61 { /* sentinel */ } 62 }; 63 64 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = { 65 { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err", 66 .reset_level = HNAE3_NONE_RESET }, 67 { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err", 68 .reset_level = HNAE3_NONE_RESET }, 69 { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err", 70 .reset_level = HNAE3_NONE_RESET }, 71 { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err", 72 .reset_level = HNAE3_NONE_RESET }, 73 { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err", 74 .reset_level = HNAE3_NONE_RESET }, 75 { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err", 76 .reset_level = HNAE3_NONE_RESET }, 77 { /* sentinel */ } 78 }; 79 80 static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { 81 { .int_msk = BIT(1), .msg = "msix_nic_ecc_mbit_err", 82 .reset_level = HNAE3_NONE_RESET }, 83 { .int_msk = BIT(3), .msg = "msix_rocee_ecc_mbit_err", 84 .reset_level = HNAE3_NONE_RESET }, 85 { /* sentinel */ } 86 }; 87 88 static const struct hclge_hw_error hclge_igu_int[] = { 89 { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err", 90 .reset_level = HNAE3_GLOBAL_RESET }, 91 { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err", 92 .reset_level = HNAE3_GLOBAL_RESET }, 93 { /* sentinel */ } 94 }; 95 96 static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { 97 { .int_msk = BIT(0), .msg = "rx_buf_overflow", 98 .reset_level = HNAE3_GLOBAL_RESET }, 99 { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow", 100 .reset_level = HNAE3_GLOBAL_RESET }, 101 { .int_msk = BIT(2), .msg = "rx_stp_fifo_underflow", 102 .reset_level = HNAE3_GLOBAL_RESET }, 103 { .int_msk = BIT(3), .msg = "tx_buf_overflow", 104 .reset_level = HNAE3_GLOBAL_RESET }, 105 { .int_msk = BIT(4), .msg = "tx_buf_underrun", 106 .reset_level = HNAE3_GLOBAL_RESET }, 107 { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow", 108 .reset_level = HNAE3_GLOBAL_RESET }, 109 { /* sentinel */ } 110 }; 111 112 static const struct hclge_hw_error hclge_ncsi_err_int[] = { 113 { .int_msk = BIT(1), .msg = "ncsi_tx_ecc_mbit_err", 114 .reset_level = HNAE3_NONE_RESET }, 115 { /* sentinel */ } 116 }; 117 118 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st1[] = { 119 { .int_msk = BIT(0), .msg = "vf_vlan_ad_mem_ecc_mbit_err", 120 .reset_level = HNAE3_GLOBAL_RESET }, 121 { .int_msk = BIT(1), .msg = "umv_mcast_group_mem_ecc_mbit_err", 122 .reset_level = HNAE3_GLOBAL_RESET }, 123 { .int_msk = BIT(2), .msg = "umv_key_mem0_ecc_mbit_err", 124 .reset_level = HNAE3_GLOBAL_RESET }, 125 { .int_msk = BIT(3), .msg = "umv_key_mem1_ecc_mbit_err", 126 .reset_level = HNAE3_GLOBAL_RESET }, 127 { .int_msk = BIT(4), .msg = "umv_key_mem2_ecc_mbit_err", 128 .reset_level = HNAE3_GLOBAL_RESET }, 129 { .int_msk = BIT(5), .msg = "umv_key_mem3_ecc_mbit_err", 130 .reset_level = HNAE3_GLOBAL_RESET }, 131 { .int_msk = BIT(6), .msg = "umv_ad_mem_ecc_mbit_err", 132 .reset_level = HNAE3_GLOBAL_RESET }, 133 { .int_msk = BIT(7), .msg = "rss_tc_mode_mem_ecc_mbit_err", 134 .reset_level = HNAE3_GLOBAL_RESET }, 135 { .int_msk = BIT(8), .msg = "rss_idt_mem0_ecc_mbit_err", 136 .reset_level = HNAE3_GLOBAL_RESET }, 137 { .int_msk = BIT(9), .msg = "rss_idt_mem1_ecc_mbit_err", 138 .reset_level = HNAE3_GLOBAL_RESET }, 139 { .int_msk = BIT(10), .msg = "rss_idt_mem2_ecc_mbit_err", 140 .reset_level = HNAE3_GLOBAL_RESET }, 141 { .int_msk = BIT(11), .msg = "rss_idt_mem3_ecc_mbit_err", 142 .reset_level = HNAE3_GLOBAL_RESET }, 143 { .int_msk = BIT(12), .msg = "rss_idt_mem4_ecc_mbit_err", 144 .reset_level = HNAE3_GLOBAL_RESET }, 145 { .int_msk = BIT(13), .msg = "rss_idt_mem5_ecc_mbit_err", 146 .reset_level = HNAE3_GLOBAL_RESET }, 147 { .int_msk = BIT(14), .msg = "rss_idt_mem6_ecc_mbit_err", 148 .reset_level = HNAE3_GLOBAL_RESET }, 149 { .int_msk = BIT(15), .msg = "rss_idt_mem7_ecc_mbit_err", 150 .reset_level = HNAE3_GLOBAL_RESET }, 151 { .int_msk = BIT(16), .msg = "rss_idt_mem8_ecc_mbit_err", 152 .reset_level = HNAE3_GLOBAL_RESET }, 153 { .int_msk = BIT(17), .msg = "rss_idt_mem9_ecc_mbit_err", 154 .reset_level = HNAE3_GLOBAL_RESET }, 155 { .int_msk = BIT(18), .msg = "rss_idt_mem10_ecc_m1bit_err", 156 .reset_level = HNAE3_GLOBAL_RESET }, 157 { .int_msk = BIT(19), .msg = "rss_idt_mem11_ecc_mbit_err", 158 .reset_level = HNAE3_GLOBAL_RESET }, 159 { .int_msk = BIT(20), .msg = "rss_idt_mem12_ecc_mbit_err", 160 .reset_level = HNAE3_GLOBAL_RESET }, 161 { .int_msk = BIT(21), .msg = "rss_idt_mem13_ecc_mbit_err", 162 .reset_level = HNAE3_GLOBAL_RESET }, 163 { .int_msk = BIT(22), .msg = "rss_idt_mem14_ecc_mbit_err", 164 .reset_level = HNAE3_GLOBAL_RESET }, 165 { .int_msk = BIT(23), .msg = "rss_idt_mem15_ecc_mbit_err", 166 .reset_level = HNAE3_GLOBAL_RESET }, 167 { .int_msk = BIT(24), .msg = "port_vlan_mem_ecc_mbit_err", 168 .reset_level = HNAE3_GLOBAL_RESET }, 169 { .int_msk = BIT(25), .msg = "mcast_linear_table_mem_ecc_mbit_err", 170 .reset_level = HNAE3_GLOBAL_RESET }, 171 { .int_msk = BIT(26), .msg = "mcast_result_mem_ecc_mbit_err", 172 .reset_level = HNAE3_GLOBAL_RESET }, 173 { .int_msk = BIT(27), .msg = "flow_director_ad_mem0_ecc_mbit_err", 174 .reset_level = HNAE3_GLOBAL_RESET }, 175 { .int_msk = BIT(28), .msg = "flow_director_ad_mem1_ecc_mbit_err", 176 .reset_level = HNAE3_GLOBAL_RESET }, 177 { .int_msk = BIT(29), .msg = "rx_vlan_tag_memory_ecc_mbit_err", 178 .reset_level = HNAE3_GLOBAL_RESET }, 179 { .int_msk = BIT(30), .msg = "Tx_UP_mapping_config_mem_ecc_mbit_err", 180 .reset_level = HNAE3_GLOBAL_RESET }, 181 { /* sentinel */ } 182 }; 183 184 static const struct hclge_hw_error hclge_ppp_pf_abnormal_int[] = { 185 { .int_msk = BIT(0), .msg = "tx_vlan_tag_err", 186 .reset_level = HNAE3_NONE_RESET }, 187 { .int_msk = BIT(1), .msg = "rss_list_tc_unassigned_queue_err", 188 .reset_level = HNAE3_NONE_RESET }, 189 { /* sentinel */ } 190 }; 191 192 static const struct hclge_hw_error hclge_ppp_mpf_abnormal_int_st3[] = { 193 { .int_msk = BIT(0), .msg = "hfs_fifo_mem_ecc_mbit_err", 194 .reset_level = HNAE3_GLOBAL_RESET }, 195 { .int_msk = BIT(1), .msg = "rslt_descr_fifo_mem_ecc_mbit_err", 196 .reset_level = HNAE3_GLOBAL_RESET }, 197 { .int_msk = BIT(2), .msg = "tx_vlan_tag_mem_ecc_mbit_err", 198 .reset_level = HNAE3_GLOBAL_RESET }, 199 { .int_msk = BIT(3), .msg = "FD_CN0_memory_ecc_mbit_err", 200 .reset_level = HNAE3_GLOBAL_RESET }, 201 { .int_msk = BIT(4), .msg = "FD_CN1_memory_ecc_mbit_err", 202 .reset_level = HNAE3_GLOBAL_RESET }, 203 { .int_msk = BIT(5), .msg = "GRO_AD_memory_ecc_mbit_err", 204 .reset_level = HNAE3_GLOBAL_RESET }, 205 { /* sentinel */ } 206 }; 207 208 static const struct hclge_hw_error hclge_tm_sch_rint[] = { 209 { .int_msk = BIT(1), .msg = "tm_sch_ecc_mbit_err", 210 .reset_level = HNAE3_GLOBAL_RESET }, 211 { .int_msk = BIT(2), .msg = "tm_sch_port_shap_sub_fifo_wr_err", 212 .reset_level = HNAE3_GLOBAL_RESET }, 213 { .int_msk = BIT(3), .msg = "tm_sch_port_shap_sub_fifo_rd_err", 214 .reset_level = HNAE3_GLOBAL_RESET }, 215 { .int_msk = BIT(4), .msg = "tm_sch_pg_pshap_sub_fifo_wr_err", 216 .reset_level = HNAE3_GLOBAL_RESET }, 217 { .int_msk = BIT(5), .msg = "tm_sch_pg_pshap_sub_fifo_rd_err", 218 .reset_level = HNAE3_GLOBAL_RESET }, 219 { .int_msk = BIT(6), .msg = "tm_sch_pg_cshap_sub_fifo_wr_err", 220 .reset_level = HNAE3_GLOBAL_RESET }, 221 { .int_msk = BIT(7), .msg = "tm_sch_pg_cshap_sub_fifo_rd_err", 222 .reset_level = HNAE3_GLOBAL_RESET }, 223 { .int_msk = BIT(8), .msg = "tm_sch_pri_pshap_sub_fifo_wr_err", 224 .reset_level = HNAE3_GLOBAL_RESET }, 225 { .int_msk = BIT(9), .msg = "tm_sch_pri_pshap_sub_fifo_rd_err", 226 .reset_level = HNAE3_GLOBAL_RESET }, 227 { .int_msk = BIT(10), .msg = "tm_sch_pri_cshap_sub_fifo_wr_err", 228 .reset_level = HNAE3_GLOBAL_RESET }, 229 { .int_msk = BIT(11), .msg = "tm_sch_pri_cshap_sub_fifo_rd_err", 230 .reset_level = HNAE3_GLOBAL_RESET }, 231 { .int_msk = BIT(12), .msg = "tm_sch_port_shap_offset_fifo_wr_err", 232 .reset_level = HNAE3_GLOBAL_RESET }, 233 { .int_msk = BIT(13), .msg = "tm_sch_port_shap_offset_fifo_rd_err", 234 .reset_level = HNAE3_GLOBAL_RESET }, 235 { .int_msk = BIT(14), .msg = "tm_sch_pg_pshap_offset_fifo_wr_err", 236 .reset_level = HNAE3_GLOBAL_RESET }, 237 { .int_msk = BIT(15), .msg = "tm_sch_pg_pshap_offset_fifo_rd_err", 238 .reset_level = HNAE3_GLOBAL_RESET }, 239 { .int_msk = BIT(16), .msg = "tm_sch_pg_cshap_offset_fifo_wr_err", 240 .reset_level = HNAE3_GLOBAL_RESET }, 241 { .int_msk = BIT(17), .msg = "tm_sch_pg_cshap_offset_fifo_rd_err", 242 .reset_level = HNAE3_GLOBAL_RESET }, 243 { .int_msk = BIT(18), .msg = "tm_sch_pri_pshap_offset_fifo_wr_err", 244 .reset_level = HNAE3_GLOBAL_RESET }, 245 { .int_msk = BIT(19), .msg = "tm_sch_pri_pshap_offset_fifo_rd_err", 246 .reset_level = HNAE3_GLOBAL_RESET }, 247 { .int_msk = BIT(20), .msg = "tm_sch_pri_cshap_offset_fifo_wr_err", 248 .reset_level = HNAE3_GLOBAL_RESET }, 249 { .int_msk = BIT(21), .msg = "tm_sch_pri_cshap_offset_fifo_rd_err", 250 .reset_level = HNAE3_GLOBAL_RESET }, 251 { .int_msk = BIT(22), .msg = "tm_sch_rq_fifo_wr_err", 252 .reset_level = HNAE3_GLOBAL_RESET }, 253 { .int_msk = BIT(23), .msg = "tm_sch_rq_fifo_rd_err", 254 .reset_level = HNAE3_GLOBAL_RESET }, 255 { .int_msk = BIT(24), .msg = "tm_sch_nq_fifo_wr_err", 256 .reset_level = HNAE3_GLOBAL_RESET }, 257 { .int_msk = BIT(25), .msg = "tm_sch_nq_fifo_rd_err", 258 .reset_level = HNAE3_GLOBAL_RESET }, 259 { .int_msk = BIT(26), .msg = "tm_sch_roce_up_fifo_wr_err", 260 .reset_level = HNAE3_GLOBAL_RESET }, 261 { .int_msk = BIT(27), .msg = "tm_sch_roce_up_fifo_rd_err", 262 .reset_level = HNAE3_GLOBAL_RESET }, 263 { .int_msk = BIT(28), .msg = "tm_sch_rcb_byte_fifo_wr_err", 264 .reset_level = HNAE3_GLOBAL_RESET }, 265 { .int_msk = BIT(29), .msg = "tm_sch_rcb_byte_fifo_rd_err", 266 .reset_level = HNAE3_GLOBAL_RESET }, 267 { .int_msk = BIT(30), .msg = "tm_sch_ssu_byte_fifo_wr_err", 268 .reset_level = HNAE3_GLOBAL_RESET }, 269 { .int_msk = BIT(31), .msg = "tm_sch_ssu_byte_fifo_rd_err", 270 .reset_level = HNAE3_GLOBAL_RESET }, 271 { /* sentinel */ } 272 }; 273 274 static const struct hclge_hw_error hclge_qcn_fifo_rint[] = { 275 { .int_msk = BIT(0), .msg = "qcn_shap_gp0_sch_fifo_rd_err", 276 .reset_level = HNAE3_GLOBAL_RESET }, 277 { .int_msk = BIT(1), .msg = "qcn_shap_gp0_sch_fifo_wr_err", 278 .reset_level = HNAE3_GLOBAL_RESET }, 279 { .int_msk = BIT(2), .msg = "qcn_shap_gp1_sch_fifo_rd_err", 280 .reset_level = HNAE3_GLOBAL_RESET }, 281 { .int_msk = BIT(3), .msg = "qcn_shap_gp1_sch_fifo_wr_err", 282 .reset_level = HNAE3_GLOBAL_RESET }, 283 { .int_msk = BIT(4), .msg = "qcn_shap_gp2_sch_fifo_rd_err", 284 .reset_level = HNAE3_GLOBAL_RESET }, 285 { .int_msk = BIT(5), .msg = "qcn_shap_gp2_sch_fifo_wr_err", 286 .reset_level = HNAE3_GLOBAL_RESET }, 287 { .int_msk = BIT(6), .msg = "qcn_shap_gp3_sch_fifo_rd_err", 288 .reset_level = HNAE3_GLOBAL_RESET }, 289 { .int_msk = BIT(7), .msg = "qcn_shap_gp3_sch_fifo_wr_err", 290 .reset_level = HNAE3_GLOBAL_RESET }, 291 { .int_msk = BIT(8), .msg = "qcn_shap_gp0_offset_fifo_rd_err", 292 .reset_level = HNAE3_GLOBAL_RESET }, 293 { .int_msk = BIT(9), .msg = "qcn_shap_gp0_offset_fifo_wr_err", 294 .reset_level = HNAE3_GLOBAL_RESET }, 295 { .int_msk = BIT(10), .msg = "qcn_shap_gp1_offset_fifo_rd_err", 296 .reset_level = HNAE3_GLOBAL_RESET }, 297 { .int_msk = BIT(11), .msg = "qcn_shap_gp1_offset_fifo_wr_err", 298 .reset_level = HNAE3_GLOBAL_RESET }, 299 { .int_msk = BIT(12), .msg = "qcn_shap_gp2_offset_fifo_rd_err", 300 .reset_level = HNAE3_GLOBAL_RESET }, 301 { .int_msk = BIT(13), .msg = "qcn_shap_gp2_offset_fifo_wr_err", 302 .reset_level = HNAE3_GLOBAL_RESET }, 303 { .int_msk = BIT(14), .msg = "qcn_shap_gp3_offset_fifo_rd_err", 304 .reset_level = HNAE3_GLOBAL_RESET }, 305 { .int_msk = BIT(15), .msg = "qcn_shap_gp3_offset_fifo_wr_err", 306 .reset_level = HNAE3_GLOBAL_RESET }, 307 { .int_msk = BIT(16), .msg = "qcn_byte_info_fifo_rd_err", 308 .reset_level = HNAE3_GLOBAL_RESET }, 309 { .int_msk = BIT(17), .msg = "qcn_byte_info_fifo_wr_err", 310 .reset_level = HNAE3_GLOBAL_RESET }, 311 { /* sentinel */ } 312 }; 313 314 static const struct hclge_hw_error hclge_qcn_ecc_rint[] = { 315 { .int_msk = BIT(1), .msg = "qcn_byte_mem_ecc_mbit_err", 316 .reset_level = HNAE3_GLOBAL_RESET }, 317 { .int_msk = BIT(3), .msg = "qcn_time_mem_ecc_mbit_err", 318 .reset_level = HNAE3_GLOBAL_RESET }, 319 { .int_msk = BIT(5), .msg = "qcn_fb_mem_ecc_mbit_err", 320 .reset_level = HNAE3_GLOBAL_RESET }, 321 { .int_msk = BIT(7), .msg = "qcn_link_mem_ecc_mbit_err", 322 .reset_level = HNAE3_GLOBAL_RESET }, 323 { .int_msk = BIT(9), .msg = "qcn_rate_mem_ecc_mbit_err", 324 .reset_level = HNAE3_GLOBAL_RESET }, 325 { .int_msk = BIT(11), .msg = "qcn_tmplt_mem_ecc_mbit_err", 326 .reset_level = HNAE3_GLOBAL_RESET }, 327 { .int_msk = BIT(13), .msg = "qcn_shap_cfg_mem_ecc_mbit_err", 328 .reset_level = HNAE3_GLOBAL_RESET }, 329 { .int_msk = BIT(15), .msg = "qcn_gp0_barrel_mem_ecc_mbit_err", 330 .reset_level = HNAE3_GLOBAL_RESET }, 331 { .int_msk = BIT(17), .msg = "qcn_gp1_barrel_mem_ecc_mbit_err", 332 .reset_level = HNAE3_GLOBAL_RESET }, 333 { .int_msk = BIT(19), .msg = "qcn_gp2_barrel_mem_ecc_mbit_err", 334 .reset_level = HNAE3_GLOBAL_RESET }, 335 { .int_msk = BIT(21), .msg = "qcn_gp3_barral_mem_ecc_mbit_err", 336 .reset_level = HNAE3_GLOBAL_RESET }, 337 { /* sentinel */ } 338 }; 339 340 static const struct hclge_hw_error hclge_mac_afifo_tnl_int[] = { 341 { .int_msk = BIT(0), .msg = "egu_cge_afifo_ecc_1bit_err", 342 .reset_level = HNAE3_NONE_RESET }, 343 { .int_msk = BIT(1), .msg = "egu_cge_afifo_ecc_mbit_err", 344 .reset_level = HNAE3_GLOBAL_RESET }, 345 { .int_msk = BIT(2), .msg = "egu_lge_afifo_ecc_1bit_err", 346 .reset_level = HNAE3_NONE_RESET }, 347 { .int_msk = BIT(3), .msg = "egu_lge_afifo_ecc_mbit_err", 348 .reset_level = HNAE3_GLOBAL_RESET }, 349 { .int_msk = BIT(4), .msg = "cge_igu_afifo_ecc_1bit_err", 350 .reset_level = HNAE3_NONE_RESET }, 351 { .int_msk = BIT(5), .msg = "cge_igu_afifo_ecc_mbit_err", 352 .reset_level = HNAE3_GLOBAL_RESET }, 353 { .int_msk = BIT(6), .msg = "lge_igu_afifo_ecc_1bit_err", 354 .reset_level = HNAE3_NONE_RESET }, 355 { .int_msk = BIT(7), .msg = "lge_igu_afifo_ecc_mbit_err", 356 .reset_level = HNAE3_GLOBAL_RESET }, 357 { .int_msk = BIT(8), .msg = "cge_igu_afifo_overflow_err", 358 .reset_level = HNAE3_GLOBAL_RESET }, 359 { .int_msk = BIT(9), .msg = "lge_igu_afifo_overflow_err", 360 .reset_level = HNAE3_GLOBAL_RESET }, 361 { .int_msk = BIT(10), .msg = "egu_cge_afifo_underrun_err", 362 .reset_level = HNAE3_GLOBAL_RESET }, 363 { .int_msk = BIT(11), .msg = "egu_lge_afifo_underrun_err", 364 .reset_level = HNAE3_GLOBAL_RESET }, 365 { .int_msk = BIT(12), .msg = "egu_ge_afifo_underrun_err", 366 .reset_level = HNAE3_GLOBAL_RESET }, 367 { .int_msk = BIT(13), .msg = "ge_igu_afifo_overflow_err", 368 .reset_level = HNAE3_GLOBAL_RESET }, 369 { /* sentinel */ } 370 }; 371 372 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { 373 { .int_msk = BIT(13), .msg = "rpu_rx_pkt_bit32_ecc_mbit_err", 374 .reset_level = HNAE3_GLOBAL_RESET }, 375 { .int_msk = BIT(14), .msg = "rpu_rx_pkt_bit33_ecc_mbit_err", 376 .reset_level = HNAE3_GLOBAL_RESET }, 377 { .int_msk = BIT(15), .msg = "rpu_rx_pkt_bit34_ecc_mbit_err", 378 .reset_level = HNAE3_GLOBAL_RESET }, 379 { .int_msk = BIT(16), .msg = "rpu_rx_pkt_bit35_ecc_mbit_err", 380 .reset_level = HNAE3_GLOBAL_RESET }, 381 { .int_msk = BIT(17), .msg = "rcb_tx_ring_ecc_mbit_err", 382 .reset_level = HNAE3_GLOBAL_RESET }, 383 { .int_msk = BIT(18), .msg = "rcb_rx_ring_ecc_mbit_err", 384 .reset_level = HNAE3_GLOBAL_RESET }, 385 { .int_msk = BIT(19), .msg = "rcb_tx_fbd_ecc_mbit_err", 386 .reset_level = HNAE3_GLOBAL_RESET }, 387 { .int_msk = BIT(20), .msg = "rcb_rx_ebd_ecc_mbit_err", 388 .reset_level = HNAE3_GLOBAL_RESET }, 389 { .int_msk = BIT(21), .msg = "rcb_tso_info_ecc_mbit_err", 390 .reset_level = HNAE3_GLOBAL_RESET }, 391 { .int_msk = BIT(22), .msg = "rcb_tx_int_info_ecc_mbit_err", 392 .reset_level = HNAE3_GLOBAL_RESET }, 393 { .int_msk = BIT(23), .msg = "rcb_rx_int_info_ecc_mbit_err", 394 .reset_level = HNAE3_GLOBAL_RESET }, 395 { .int_msk = BIT(24), .msg = "tpu_tx_pkt_0_ecc_mbit_err", 396 .reset_level = HNAE3_GLOBAL_RESET }, 397 { .int_msk = BIT(25), .msg = "tpu_tx_pkt_1_ecc_mbit_err", 398 .reset_level = HNAE3_GLOBAL_RESET }, 399 { .int_msk = BIT(26), .msg = "rd_bus_err", 400 .reset_level = HNAE3_GLOBAL_RESET }, 401 { .int_msk = BIT(27), .msg = "wr_bus_err", 402 .reset_level = HNAE3_GLOBAL_RESET }, 403 { .int_msk = BIT(28), .msg = "reg_search_miss", 404 .reset_level = HNAE3_GLOBAL_RESET }, 405 { .int_msk = BIT(29), .msg = "rx_q_search_miss", 406 .reset_level = HNAE3_NONE_RESET }, 407 { .int_msk = BIT(30), .msg = "ooo_ecc_err_detect", 408 .reset_level = HNAE3_NONE_RESET }, 409 { .int_msk = BIT(31), .msg = "ooo_ecc_err_multpl", 410 .reset_level = HNAE3_GLOBAL_RESET }, 411 { /* sentinel */ } 412 }; 413 414 static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { 415 { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err", 416 .reset_level = HNAE3_GLOBAL_RESET }, 417 { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err", 418 .reset_level = HNAE3_GLOBAL_RESET }, 419 { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err", 420 .reset_level = HNAE3_GLOBAL_RESET }, 421 { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err", 422 .reset_level = HNAE3_GLOBAL_RESET }, 423 { /* sentinel */ } 424 }; 425 426 static const struct hclge_hw_error hclge_ppu_pf_abnormal_int[] = { 427 { .int_msk = BIT(0), .msg = "over_8bd_no_fe", 428 .reset_level = HNAE3_FUNC_RESET }, 429 { .int_msk = BIT(1), .msg = "tso_mss_cmp_min_err", 430 .reset_level = HNAE3_NONE_RESET }, 431 { .int_msk = BIT(2), .msg = "tso_mss_cmp_max_err", 432 .reset_level = HNAE3_NONE_RESET }, 433 { .int_msk = BIT(3), .msg = "tx_rd_fbd_poison", 434 .reset_level = HNAE3_FUNC_RESET }, 435 { .int_msk = BIT(4), .msg = "rx_rd_ebd_poison", 436 .reset_level = HNAE3_FUNC_RESET }, 437 { .int_msk = BIT(5), .msg = "buf_wait_timeout", 438 .reset_level = HNAE3_NONE_RESET }, 439 { /* sentinel */ } 440 }; 441 442 static const struct hclge_hw_error hclge_ssu_com_err_int[] = { 443 { .int_msk = BIT(0), .msg = "buf_sum_err", 444 .reset_level = HNAE3_NONE_RESET }, 445 { .int_msk = BIT(1), .msg = "ppp_mb_num_err", 446 .reset_level = HNAE3_NONE_RESET }, 447 { .int_msk = BIT(2), .msg = "ppp_mbid_err", 448 .reset_level = HNAE3_GLOBAL_RESET }, 449 { .int_msk = BIT(3), .msg = "ppp_rlt_mac_err", 450 .reset_level = HNAE3_GLOBAL_RESET }, 451 { .int_msk = BIT(4), .msg = "ppp_rlt_host_err", 452 .reset_level = HNAE3_GLOBAL_RESET }, 453 { .int_msk = BIT(5), .msg = "cks_edit_position_err", 454 .reset_level = HNAE3_GLOBAL_RESET }, 455 { .int_msk = BIT(6), .msg = "cks_edit_condition_err", 456 .reset_level = HNAE3_GLOBAL_RESET }, 457 { .int_msk = BIT(7), .msg = "vlan_edit_condition_err", 458 .reset_level = HNAE3_GLOBAL_RESET }, 459 { .int_msk = BIT(8), .msg = "vlan_num_ot_err", 460 .reset_level = HNAE3_GLOBAL_RESET }, 461 { .int_msk = BIT(9), .msg = "vlan_num_in_err", 462 .reset_level = HNAE3_GLOBAL_RESET }, 463 { /* sentinel */ } 464 }; 465 466 #define HCLGE_SSU_MEM_ECC_ERR(x) \ 467 { .int_msk = BIT(x), .msg = "ssu_mem" #x "_ecc_mbit_err", \ 468 .reset_level = HNAE3_GLOBAL_RESET } 469 470 static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = { 471 HCLGE_SSU_MEM_ECC_ERR(0), 472 HCLGE_SSU_MEM_ECC_ERR(1), 473 HCLGE_SSU_MEM_ECC_ERR(2), 474 HCLGE_SSU_MEM_ECC_ERR(3), 475 HCLGE_SSU_MEM_ECC_ERR(4), 476 HCLGE_SSU_MEM_ECC_ERR(5), 477 HCLGE_SSU_MEM_ECC_ERR(6), 478 HCLGE_SSU_MEM_ECC_ERR(7), 479 HCLGE_SSU_MEM_ECC_ERR(8), 480 HCLGE_SSU_MEM_ECC_ERR(9), 481 HCLGE_SSU_MEM_ECC_ERR(10), 482 HCLGE_SSU_MEM_ECC_ERR(11), 483 HCLGE_SSU_MEM_ECC_ERR(12), 484 HCLGE_SSU_MEM_ECC_ERR(13), 485 HCLGE_SSU_MEM_ECC_ERR(14), 486 HCLGE_SSU_MEM_ECC_ERR(15), 487 HCLGE_SSU_MEM_ECC_ERR(16), 488 HCLGE_SSU_MEM_ECC_ERR(17), 489 HCLGE_SSU_MEM_ECC_ERR(18), 490 HCLGE_SSU_MEM_ECC_ERR(19), 491 HCLGE_SSU_MEM_ECC_ERR(20), 492 HCLGE_SSU_MEM_ECC_ERR(21), 493 HCLGE_SSU_MEM_ECC_ERR(22), 494 HCLGE_SSU_MEM_ECC_ERR(23), 495 HCLGE_SSU_MEM_ECC_ERR(24), 496 HCLGE_SSU_MEM_ECC_ERR(25), 497 HCLGE_SSU_MEM_ECC_ERR(26), 498 HCLGE_SSU_MEM_ECC_ERR(27), 499 HCLGE_SSU_MEM_ECC_ERR(28), 500 HCLGE_SSU_MEM_ECC_ERR(29), 501 HCLGE_SSU_MEM_ECC_ERR(30), 502 HCLGE_SSU_MEM_ECC_ERR(31), 503 { /* sentinel */ } 504 }; 505 506 static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = { 507 { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port", 508 .reset_level = HNAE3_FUNC_RESET }, 509 { .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port", 510 .reset_level = HNAE3_GLOBAL_RESET }, 511 { .int_msk = BIT(2), .msg = "igu_pkt_without_key_port", 512 .reset_level = HNAE3_GLOBAL_RESET }, 513 { .int_msk = BIT(3), .msg = "roc_eof_mis_match_port", 514 .reset_level = HNAE3_GLOBAL_RESET }, 515 { .int_msk = BIT(4), .msg = "tpu_eof_mis_match_port", 516 .reset_level = HNAE3_GLOBAL_RESET }, 517 { .int_msk = BIT(5), .msg = "igu_eof_mis_match_port", 518 .reset_level = HNAE3_GLOBAL_RESET }, 519 { .int_msk = BIT(6), .msg = "roc_sof_mis_match_port", 520 .reset_level = HNAE3_GLOBAL_RESET }, 521 { .int_msk = BIT(7), .msg = "tpu_sof_mis_match_port", 522 .reset_level = HNAE3_GLOBAL_RESET }, 523 { .int_msk = BIT(8), .msg = "igu_sof_mis_match_port", 524 .reset_level = HNAE3_GLOBAL_RESET }, 525 { .int_msk = BIT(11), .msg = "ets_rd_int_rx_port", 526 .reset_level = HNAE3_GLOBAL_RESET }, 527 { .int_msk = BIT(12), .msg = "ets_wr_int_rx_port", 528 .reset_level = HNAE3_GLOBAL_RESET }, 529 { .int_msk = BIT(13), .msg = "ets_rd_int_tx_port", 530 .reset_level = HNAE3_GLOBAL_RESET }, 531 { .int_msk = BIT(14), .msg = "ets_wr_int_tx_port", 532 .reset_level = HNAE3_GLOBAL_RESET }, 533 { /* sentinel */ } 534 }; 535 536 static const struct hclge_hw_error hclge_ssu_fifo_overflow_int[] = { 537 { .int_msk = BIT(0), .msg = "ig_mac_inf_int", 538 .reset_level = HNAE3_GLOBAL_RESET }, 539 { .int_msk = BIT(1), .msg = "ig_host_inf_int", 540 .reset_level = HNAE3_GLOBAL_RESET }, 541 { .int_msk = BIT(2), .msg = "ig_roc_buf_int", 542 .reset_level = HNAE3_GLOBAL_RESET }, 543 { .int_msk = BIT(3), .msg = "ig_host_data_fifo_int", 544 .reset_level = HNAE3_GLOBAL_RESET }, 545 { .int_msk = BIT(4), .msg = "ig_host_key_fifo_int", 546 .reset_level = HNAE3_GLOBAL_RESET }, 547 { .int_msk = BIT(5), .msg = "tx_qcn_fifo_int", 548 .reset_level = HNAE3_GLOBAL_RESET }, 549 { .int_msk = BIT(6), .msg = "rx_qcn_fifo_int", 550 .reset_level = HNAE3_GLOBAL_RESET }, 551 { .int_msk = BIT(7), .msg = "tx_pf_rd_fifo_int", 552 .reset_level = HNAE3_GLOBAL_RESET }, 553 { .int_msk = BIT(8), .msg = "rx_pf_rd_fifo_int", 554 .reset_level = HNAE3_GLOBAL_RESET }, 555 { .int_msk = BIT(9), .msg = "qm_eof_fifo_int", 556 .reset_level = HNAE3_GLOBAL_RESET }, 557 { .int_msk = BIT(10), .msg = "mb_rlt_fifo_int", 558 .reset_level = HNAE3_GLOBAL_RESET }, 559 { .int_msk = BIT(11), .msg = "dup_uncopy_fifo_int", 560 .reset_level = HNAE3_GLOBAL_RESET }, 561 { .int_msk = BIT(12), .msg = "dup_cnt_rd_fifo_int", 562 .reset_level = HNAE3_GLOBAL_RESET }, 563 { .int_msk = BIT(13), .msg = "dup_cnt_drop_fifo_int", 564 .reset_level = HNAE3_GLOBAL_RESET }, 565 { .int_msk = BIT(14), .msg = "dup_cnt_wrb_fifo_int", 566 .reset_level = HNAE3_GLOBAL_RESET }, 567 { .int_msk = BIT(15), .msg = "host_cmd_fifo_int", 568 .reset_level = HNAE3_GLOBAL_RESET }, 569 { .int_msk = BIT(16), .msg = "mac_cmd_fifo_int", 570 .reset_level = HNAE3_GLOBAL_RESET }, 571 { .int_msk = BIT(17), .msg = "host_cmd_bitmap_empty_int", 572 .reset_level = HNAE3_GLOBAL_RESET }, 573 { .int_msk = BIT(18), .msg = "mac_cmd_bitmap_empty_int", 574 .reset_level = HNAE3_GLOBAL_RESET }, 575 { .int_msk = BIT(19), .msg = "dup_bitmap_empty_int", 576 .reset_level = HNAE3_GLOBAL_RESET }, 577 { .int_msk = BIT(20), .msg = "out_queue_bitmap_empty_int", 578 .reset_level = HNAE3_GLOBAL_RESET }, 579 { .int_msk = BIT(21), .msg = "bank2_bitmap_empty_int", 580 .reset_level = HNAE3_GLOBAL_RESET }, 581 { .int_msk = BIT(22), .msg = "bank1_bitmap_empty_int", 582 .reset_level = HNAE3_GLOBAL_RESET }, 583 { .int_msk = BIT(23), .msg = "bank0_bitmap_empty_int", 584 .reset_level = HNAE3_GLOBAL_RESET }, 585 { /* sentinel */ } 586 }; 587 588 static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = { 589 { .int_msk = BIT(0), .msg = "ets_rd_int_rx_tcg", 590 .reset_level = HNAE3_GLOBAL_RESET }, 591 { .int_msk = BIT(1), .msg = "ets_wr_int_rx_tcg", 592 .reset_level = HNAE3_GLOBAL_RESET }, 593 { .int_msk = BIT(2), .msg = "ets_rd_int_tx_tcg", 594 .reset_level = HNAE3_GLOBAL_RESET }, 595 { .int_msk = BIT(3), .msg = "ets_wr_int_tx_tcg", 596 .reset_level = HNAE3_GLOBAL_RESET }, 597 { /* sentinel */ } 598 }; 599 600 static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = { 601 { .int_msk = BIT(0), .msg = "roc_pkt_without_key_port", 602 .reset_level = HNAE3_FUNC_RESET }, 603 { .int_msk = BIT(9), .msg = "low_water_line_err_port", 604 .reset_level = HNAE3_NONE_RESET }, 605 { .int_msk = BIT(10), .msg = "hi_water_line_err_port", 606 .reset_level = HNAE3_GLOBAL_RESET }, 607 { /* sentinel */ } 608 }; 609 610 static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { 611 { .int_msk = 0, .msg = "rocee qmm ovf: sgid invalid err" }, 612 { .int_msk = 0x4, .msg = "rocee qmm ovf: sgid ovf err" }, 613 { .int_msk = 0x8, .msg = "rocee qmm ovf: smac invalid err" }, 614 { .int_msk = 0xC, .msg = "rocee qmm ovf: smac ovf err" }, 615 { .int_msk = 0x10, .msg = "rocee qmm ovf: cqc invalid err" }, 616 { .int_msk = 0x11, .msg = "rocee qmm ovf: cqc ovf err" }, 617 { .int_msk = 0x12, .msg = "rocee qmm ovf: cqc hopnum err" }, 618 { .int_msk = 0x13, .msg = "rocee qmm ovf: cqc ba0 err" }, 619 { .int_msk = 0x14, .msg = "rocee qmm ovf: srqc invalid err" }, 620 { .int_msk = 0x15, .msg = "rocee qmm ovf: srqc ovf err" }, 621 { .int_msk = 0x16, .msg = "rocee qmm ovf: srqc hopnum err" }, 622 { .int_msk = 0x17, .msg = "rocee qmm ovf: srqc ba0 err" }, 623 { .int_msk = 0x18, .msg = "rocee qmm ovf: mpt invalid err" }, 624 { .int_msk = 0x19, .msg = "rocee qmm ovf: mpt ovf err" }, 625 { .int_msk = 0x1A, .msg = "rocee qmm ovf: mpt hopnum err" }, 626 { .int_msk = 0x1B, .msg = "rocee qmm ovf: mpt ba0 err" }, 627 { .int_msk = 0x1C, .msg = "rocee qmm ovf: qpc invalid err" }, 628 { .int_msk = 0x1D, .msg = "rocee qmm ovf: qpc ovf err" }, 629 { .int_msk = 0x1E, .msg = "rocee qmm ovf: qpc hopnum err" }, 630 { .int_msk = 0x1F, .msg = "rocee qmm ovf: qpc ba0 err" }, 631 { /* sentinel */ } 632 }; 633 634 static void hclge_log_error(struct device *dev, char *reg, 635 const struct hclge_hw_error *err, 636 u32 err_sts, unsigned long *reset_requests) 637 { 638 while (err->msg) { 639 if (err->int_msk & err_sts) { 640 dev_err(dev, "%s %s found [error status=0x%x]\n", 641 reg, err->msg, err_sts); 642 if (err->reset_level && 643 err->reset_level != HNAE3_NONE_RESET) 644 set_bit(err->reset_level, reset_requests); 645 } 646 err++; 647 } 648 } 649 650 /* hclge_cmd_query_error: read the error information 651 * @hdev: pointer to struct hclge_dev 652 * @desc: descriptor for describing the command 653 * @cmd: command opcode 654 * @flag: flag for extended command structure 655 * 656 * This function query the error info from hw register/s using command 657 */ 658 static int hclge_cmd_query_error(struct hclge_dev *hdev, 659 struct hclge_desc *desc, u32 cmd, u16 flag) 660 { 661 struct device *dev = &hdev->pdev->dev; 662 int desc_num = 1; 663 int ret; 664 665 hclge_cmd_setup_basic_desc(&desc[0], cmd, true); 666 if (flag) { 667 desc[0].flag |= cpu_to_le16(flag); 668 hclge_cmd_setup_basic_desc(&desc[1], cmd, true); 669 desc_num = 2; 670 } 671 672 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 673 if (ret) 674 dev_err(dev, "query error cmd failed (%d)\n", ret); 675 676 return ret; 677 } 678 679 static int hclge_clear_mac_tnl_int(struct hclge_dev *hdev) 680 { 681 struct hclge_desc desc; 682 683 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_MAC_TNL_INT, false); 684 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_CLR); 685 686 return hclge_cmd_send(&hdev->hw, &desc, 1); 687 } 688 689 static int hclge_config_common_hw_err_int(struct hclge_dev *hdev, bool en) 690 { 691 struct device *dev = &hdev->pdev->dev; 692 struct hclge_desc desc[2]; 693 int ret; 694 695 /* configure common error interrupts */ 696 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false); 697 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 698 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false); 699 700 if (en) { 701 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN); 702 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN | 703 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN); 704 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN); 705 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN | 706 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN); 707 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN); 708 } 709 710 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK); 711 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK | 712 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK); 713 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK); 714 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK | 715 HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK); 716 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK); 717 718 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 719 if (ret) 720 dev_err(dev, 721 "fail(%d) to configure common err interrupts\n", ret); 722 723 return ret; 724 } 725 726 static int hclge_config_ncsi_hw_err_int(struct hclge_dev *hdev, bool en) 727 { 728 struct device *dev = &hdev->pdev->dev; 729 struct hclge_desc desc; 730 int ret; 731 732 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2) 733 return 0; 734 735 /* configure NCSI error interrupts */ 736 hclge_cmd_setup_basic_desc(&desc, HCLGE_NCSI_INT_EN, false); 737 if (en) 738 desc.data[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN); 739 740 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 741 if (ret) 742 dev_err(dev, 743 "fail(%d) to configure NCSI error interrupts\n", ret); 744 745 return ret; 746 } 747 748 static int hclge_config_igu_egu_hw_err_int(struct hclge_dev *hdev, bool en) 749 { 750 struct device *dev = &hdev->pdev->dev; 751 struct hclge_desc desc; 752 int ret; 753 754 /* configure IGU,EGU error interrupts */ 755 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false); 756 if (en) 757 desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN); 758 759 desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK); 760 761 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 762 if (ret) { 763 dev_err(dev, 764 "fail(%d) to configure IGU common interrupts\n", ret); 765 return ret; 766 } 767 768 hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_EGU_TNL_INT_EN, false); 769 if (en) 770 desc.data[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN); 771 772 desc.data[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK); 773 774 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 775 if (ret) { 776 dev_err(dev, 777 "fail(%d) to configure IGU-EGU TNL interrupts\n", ret); 778 return ret; 779 } 780 781 ret = hclge_config_ncsi_hw_err_int(hdev, en); 782 783 return ret; 784 } 785 786 static int hclge_config_ppp_error_interrupt(struct hclge_dev *hdev, u32 cmd, 787 bool en) 788 { 789 struct device *dev = &hdev->pdev->dev; 790 struct hclge_desc desc[2]; 791 int ret; 792 793 /* configure PPP error interrupts */ 794 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 795 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 796 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 797 798 if (cmd == HCLGE_PPP_CMD0_INT_CMD) { 799 if (en) { 800 desc[0].data[0] = 801 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN); 802 desc[0].data[1] = 803 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN); 804 desc[0].data[4] = cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN); 805 } 806 807 desc[1].data[0] = 808 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT0_EN_MASK); 809 desc[1].data[1] = 810 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT1_EN_MASK); 811 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 812 desc[1].data[2] = 813 cpu_to_le32(HCLGE_PPP_PF_ERR_INT_EN_MASK); 814 } else if (cmd == HCLGE_PPP_CMD1_INT_CMD) { 815 if (en) { 816 desc[0].data[0] = 817 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN); 818 desc[0].data[1] = 819 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN); 820 } 821 822 desc[1].data[0] = 823 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT2_EN_MASK); 824 desc[1].data[1] = 825 cpu_to_le32(HCLGE_PPP_MPF_ECC_ERR_INT3_EN_MASK); 826 } 827 828 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 829 if (ret) 830 dev_err(dev, "fail(%d) to configure PPP error intr\n", ret); 831 832 return ret; 833 } 834 835 static int hclge_config_ppp_hw_err_int(struct hclge_dev *hdev, bool en) 836 { 837 int ret; 838 839 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD0_INT_CMD, 840 en); 841 if (ret) 842 return ret; 843 844 ret = hclge_config_ppp_error_interrupt(hdev, HCLGE_PPP_CMD1_INT_CMD, 845 en); 846 847 return ret; 848 } 849 850 static int hclge_config_tm_hw_err_int(struct hclge_dev *hdev, bool en) 851 { 852 struct device *dev = &hdev->pdev->dev; 853 struct hclge_desc desc; 854 int ret; 855 856 /* configure TM SCH hw errors */ 857 hclge_cmd_setup_basic_desc(&desc, HCLGE_TM_SCH_ECC_INT_EN, false); 858 if (en) 859 desc.data[0] = cpu_to_le32(HCLGE_TM_SCH_ECC_ERR_INT_EN); 860 861 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 862 if (ret) { 863 dev_err(dev, "fail(%d) to configure TM SCH errors\n", ret); 864 return ret; 865 } 866 867 /* configure TM QCN hw errors */ 868 ret = hclge_cmd_query_error(hdev, &desc, HCLGE_TM_QCN_MEM_INT_CFG, 0); 869 if (ret) { 870 dev_err(dev, "fail(%d) to read TM QCN CFG status\n", ret); 871 return ret; 872 } 873 874 hclge_cmd_reuse_desc(&desc, false); 875 if (en) 876 desc.data[1] = cpu_to_le32(HCLGE_TM_QCN_MEM_ERR_INT_EN); 877 878 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 879 if (ret) 880 dev_err(dev, 881 "fail(%d) to configure TM QCN mem errors\n", ret); 882 883 return ret; 884 } 885 886 static int hclge_config_mac_err_int(struct hclge_dev *hdev, bool en) 887 { 888 struct device *dev = &hdev->pdev->dev; 889 struct hclge_desc desc; 890 int ret; 891 892 /* configure MAC common error interrupts */ 893 hclge_cmd_setup_basic_desc(&desc, HCLGE_MAC_COMMON_INT_EN, false); 894 if (en) 895 desc.data[0] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN); 896 897 desc.data[1] = cpu_to_le32(HCLGE_MAC_COMMON_ERR_INT_EN_MASK); 898 899 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 900 if (ret) 901 dev_err(dev, 902 "fail(%d) to configure MAC COMMON error intr\n", ret); 903 904 return ret; 905 } 906 907 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en) 908 { 909 struct hclge_desc desc; 910 911 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_TNL_INT_EN, false); 912 if (en) 913 desc.data[0] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN); 914 else 915 desc.data[0] = 0; 916 917 desc.data[1] = cpu_to_le32(HCLGE_MAC_TNL_INT_EN_MASK); 918 919 return hclge_cmd_send(&hdev->hw, &desc, 1); 920 } 921 922 static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, 923 bool en) 924 { 925 struct device *dev = &hdev->pdev->dev; 926 struct hclge_desc desc[2]; 927 int desc_num = 1; 928 int ret; 929 930 /* configure PPU error interrupts */ 931 if (cmd == HCLGE_PPU_MPF_ECC_INT_CMD) { 932 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 933 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 934 hclge_cmd_setup_basic_desc(&desc[1], cmd, false); 935 if (en) { 936 desc[0].data[0] = 937 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN); 938 desc[0].data[1] = 939 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN); 940 desc[1].data[3] = 941 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN); 942 desc[1].data[4] = 943 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN); 944 } 945 946 desc[1].data[0] = 947 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK); 948 desc[1].data[1] = 949 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK); 950 desc[1].data[2] = 951 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK); 952 desc[1].data[3] |= 953 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK); 954 desc_num = 2; 955 } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { 956 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 957 if (en) 958 desc[0].data[0] = 959 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2); 960 961 desc[0].data[2] = 962 cpu_to_le32(HCLGE_PPU_MPF_ABNORMAL_INT2_EN2_MASK); 963 } else if (cmd == HCLGE_PPU_PF_OTHER_INT_CMD) { 964 hclge_cmd_setup_basic_desc(&desc[0], cmd, false); 965 if (en) 966 desc[0].data[0] = 967 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN); 968 969 desc[0].data[2] = 970 cpu_to_le32(HCLGE_PPU_PF_ABNORMAL_INT_EN_MASK); 971 } else { 972 dev_err(dev, "Invalid cmd to configure PPU error interrupts\n"); 973 return -EINVAL; 974 } 975 976 ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); 977 978 return ret; 979 } 980 981 static int hclge_config_ppu_hw_err_int(struct hclge_dev *hdev, bool en) 982 { 983 struct device *dev = &hdev->pdev->dev; 984 int ret; 985 986 ret = hclge_config_ppu_error_interrupts(hdev, HCLGE_PPU_MPF_ECC_INT_CMD, 987 en); 988 if (ret) { 989 dev_err(dev, "fail(%d) to configure PPU MPF ECC error intr\n", 990 ret); 991 return ret; 992 } 993 994 ret = hclge_config_ppu_error_interrupts(hdev, 995 HCLGE_PPU_MPF_OTHER_INT_CMD, 996 en); 997 if (ret) { 998 dev_err(dev, "fail(%d) to configure PPU MPF other intr\n", ret); 999 return ret; 1000 } 1001 1002 ret = hclge_config_ppu_error_interrupts(hdev, 1003 HCLGE_PPU_PF_OTHER_INT_CMD, en); 1004 if (ret) 1005 dev_err(dev, "fail(%d) to configure PPU PF error interrupts\n", 1006 ret); 1007 return ret; 1008 } 1009 1010 static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) 1011 { 1012 struct device *dev = &hdev->pdev->dev; 1013 struct hclge_desc desc[2]; 1014 int ret; 1015 1016 /* configure SSU ecc error interrupts */ 1017 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_ECC_INT_CMD, false); 1018 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1019 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_ECC_INT_CMD, false); 1020 if (en) { 1021 desc[0].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN); 1022 desc[0].data[1] = 1023 cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN); 1024 desc[0].data[4] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN); 1025 } 1026 1027 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_1BIT_ECC_ERR_INT_EN_MASK); 1028 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_MULTI_BIT_ECC_ERR_INT_EN_MASK); 1029 desc[1].data[2] = cpu_to_le32(HCLGE_SSU_BIT32_ECC_ERR_INT_EN_MASK); 1030 1031 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1032 if (ret) { 1033 dev_err(dev, 1034 "fail(%d) to configure SSU ECC error interrupt\n", ret); 1035 return ret; 1036 } 1037 1038 /* configure SSU common error interrupts */ 1039 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_SSU_COMMON_INT_CMD, false); 1040 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1041 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_SSU_COMMON_INT_CMD, false); 1042 1043 if (en) { 1044 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 1045 desc[0].data[0] = 1046 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN); 1047 else 1048 desc[0].data[0] = 1049 cpu_to_le32(HCLGE_SSU_COMMON_INT_EN & ~BIT(5)); 1050 desc[0].data[1] = cpu_to_le32(HCLGE_SSU_PORT_BASED_ERR_INT_EN); 1051 desc[0].data[2] = 1052 cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN); 1053 } 1054 1055 desc[1].data[0] = cpu_to_le32(HCLGE_SSU_COMMON_INT_EN_MASK | 1056 HCLGE_SSU_PORT_BASED_ERR_INT_EN_MASK); 1057 desc[1].data[1] = cpu_to_le32(HCLGE_SSU_FIFO_OVERFLOW_ERR_INT_EN_MASK); 1058 1059 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2); 1060 if (ret) 1061 dev_err(dev, 1062 "fail(%d) to configure SSU COMMON error intr\n", ret); 1063 1064 return ret; 1065 } 1066 1067 /* hclge_query_bd_num: query number of buffer descriptors 1068 * @hdev: pointer to struct hclge_dev 1069 * @is_ras: true for ras, false for msix 1070 * @mpf_bd_num: number of main PF interrupt buffer descriptors 1071 * @pf_bd_num: number of not main PF interrupt buffer descriptors 1072 * 1073 * This function querys number of mpf and pf buffer descriptors. 1074 */ 1075 static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, 1076 u32 *mpf_bd_num, u32 *pf_bd_num) 1077 { 1078 struct device *dev = &hdev->pdev->dev; 1079 u32 mpf_min_bd_num, pf_min_bd_num; 1080 enum hclge_opcode_type opcode; 1081 struct hclge_desc desc_bd; 1082 int ret; 1083 1084 if (is_ras) { 1085 opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; 1086 mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; 1087 pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; 1088 } else { 1089 opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; 1090 mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; 1091 pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; 1092 } 1093 1094 hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); 1095 ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); 1096 if (ret) { 1097 dev_err(dev, "fail(%d) to query msix int status bd num\n", 1098 ret); 1099 return ret; 1100 } 1101 1102 *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); 1103 *pf_bd_num = le32_to_cpu(desc_bd.data[1]); 1104 if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { 1105 dev_err(dev, "Invalid bd num: mpf(%u), pf(%u)\n", 1106 *mpf_bd_num, *pf_bd_num); 1107 return -EINVAL; 1108 } 1109 1110 return 0; 1111 } 1112 1113 /* hclge_handle_mpf_ras_error: handle all main PF RAS errors 1114 * @hdev: pointer to struct hclge_dev 1115 * @desc: descriptor for describing the command 1116 * @num: number of extended command structures 1117 * 1118 * This function handles all the main PF RAS errors in the 1119 * hw register/s using command. 1120 */ 1121 static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, 1122 struct hclge_desc *desc, 1123 int num) 1124 { 1125 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1126 struct device *dev = &hdev->pdev->dev; 1127 __le32 *desc_data; 1128 u32 status; 1129 int ret; 1130 1131 /* query all main PF RAS errors */ 1132 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, 1133 true); 1134 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1135 if (ret) { 1136 dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); 1137 return ret; 1138 } 1139 1140 /* log HNS common errors */ 1141 status = le32_to_cpu(desc[0].data[0]); 1142 if (status) 1143 hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", 1144 &hclge_imp_tcm_ecc_int[0], status, 1145 &ae_dev->hw_err_reset_req); 1146 1147 status = le32_to_cpu(desc[0].data[1]); 1148 if (status) 1149 hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", 1150 &hclge_cmdq_nic_mem_ecc_int[0], status, 1151 &ae_dev->hw_err_reset_req); 1152 1153 if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) 1154 dev_warn(dev, "imp_rd_data_poison_err found\n"); 1155 1156 status = le32_to_cpu(desc[0].data[3]); 1157 if (status) 1158 hclge_log_error(dev, "TQP_INT_ECC_INT_STS", 1159 &hclge_tqp_int_ecc_int[0], status, 1160 &ae_dev->hw_err_reset_req); 1161 1162 status = le32_to_cpu(desc[0].data[4]); 1163 if (status) 1164 hclge_log_error(dev, "MSIX_ECC_INT_STS", 1165 &hclge_msix_sram_ecc_int[0], status, 1166 &ae_dev->hw_err_reset_req); 1167 1168 /* log SSU(Storage Switch Unit) errors */ 1169 desc_data = (__le32 *)&desc[2]; 1170 status = le32_to_cpu(*(desc_data + 2)); 1171 if (status) 1172 hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", 1173 &hclge_ssu_mem_ecc_err_int[0], status, 1174 &ae_dev->hw_err_reset_req); 1175 1176 status = le32_to_cpu(*(desc_data + 3)) & BIT(0); 1177 if (status) { 1178 dev_err(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", 1179 status); 1180 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1181 } 1182 1183 status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; 1184 if (status) 1185 hclge_log_error(dev, "SSU_COMMON_ERR_INT", 1186 &hclge_ssu_com_err_int[0], status, 1187 &ae_dev->hw_err_reset_req); 1188 1189 /* log IGU(Ingress Unit) errors */ 1190 desc_data = (__le32 *)&desc[3]; 1191 status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; 1192 if (status) 1193 hclge_log_error(dev, "IGU_INT_STS", 1194 &hclge_igu_int[0], status, 1195 &ae_dev->hw_err_reset_req); 1196 1197 /* log PPP(Programmable Packet Process) errors */ 1198 desc_data = (__le32 *)&desc[4]; 1199 status = le32_to_cpu(*(desc_data + 1)); 1200 if (status) 1201 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", 1202 &hclge_ppp_mpf_abnormal_int_st1[0], status, 1203 &ae_dev->hw_err_reset_req); 1204 1205 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; 1206 if (status) 1207 hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", 1208 &hclge_ppp_mpf_abnormal_int_st3[0], status, 1209 &ae_dev->hw_err_reset_req); 1210 1211 /* log PPU(RCB) errors */ 1212 desc_data = (__le32 *)&desc[5]; 1213 status = le32_to_cpu(*(desc_data + 1)); 1214 if (status) { 1215 dev_err(dev, 1216 "PPU_MPF_ABNORMAL_INT_ST1 rpu_rx_pkt_ecc_mbit_err found\n"); 1217 set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); 1218 } 1219 1220 status = le32_to_cpu(*(desc_data + 2)); 1221 if (status) 1222 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", 1223 &hclge_ppu_mpf_abnormal_int_st2[0], status, 1224 &ae_dev->hw_err_reset_req); 1225 1226 status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; 1227 if (status) 1228 hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", 1229 &hclge_ppu_mpf_abnormal_int_st3[0], status, 1230 &ae_dev->hw_err_reset_req); 1231 1232 /* log TM(Traffic Manager) errors */ 1233 desc_data = (__le32 *)&desc[6]; 1234 status = le32_to_cpu(*desc_data); 1235 if (status) 1236 hclge_log_error(dev, "TM_SCH_RINT", 1237 &hclge_tm_sch_rint[0], status, 1238 &ae_dev->hw_err_reset_req); 1239 1240 /* log QCN(Quantized Congestion Control) errors */ 1241 desc_data = (__le32 *)&desc[7]; 1242 status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; 1243 if (status) 1244 hclge_log_error(dev, "QCN_FIFO_RINT", 1245 &hclge_qcn_fifo_rint[0], status, 1246 &ae_dev->hw_err_reset_req); 1247 1248 status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; 1249 if (status) 1250 hclge_log_error(dev, "QCN_ECC_RINT", 1251 &hclge_qcn_ecc_rint[0], status, 1252 &ae_dev->hw_err_reset_req); 1253 1254 /* log NCSI errors */ 1255 desc_data = (__le32 *)&desc[9]; 1256 status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; 1257 if (status) 1258 hclge_log_error(dev, "NCSI_ECC_INT_RPT", 1259 &hclge_ncsi_err_int[0], status, 1260 &ae_dev->hw_err_reset_req); 1261 1262 /* clear all main PF RAS errors */ 1263 hclge_cmd_reuse_desc(&desc[0], false); 1264 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1265 if (ret) 1266 dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); 1267 1268 return ret; 1269 } 1270 1271 /* hclge_handle_pf_ras_error: handle all PF RAS errors 1272 * @hdev: pointer to struct hclge_dev 1273 * @desc: descriptor for describing the command 1274 * @num: number of extended command structures 1275 * 1276 * This function handles all the PF RAS errors in the 1277 * hw register/s using command. 1278 */ 1279 static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, 1280 struct hclge_desc *desc, 1281 int num) 1282 { 1283 struct hnae3_ae_dev *ae_dev = hdev->ae_dev; 1284 struct device *dev = &hdev->pdev->dev; 1285 __le32 *desc_data; 1286 u32 status; 1287 int ret; 1288 1289 /* query all PF RAS errors */ 1290 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, 1291 true); 1292 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1293 if (ret) { 1294 dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); 1295 return ret; 1296 } 1297 1298 /* log SSU(Storage Switch Unit) errors */ 1299 status = le32_to_cpu(desc[0].data[0]); 1300 if (status) 1301 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 1302 &hclge_ssu_port_based_err_int[0], status, 1303 &ae_dev->hw_err_reset_req); 1304 1305 status = le32_to_cpu(desc[0].data[1]); 1306 if (status) 1307 hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", 1308 &hclge_ssu_fifo_overflow_int[0], status, 1309 &ae_dev->hw_err_reset_req); 1310 1311 status = le32_to_cpu(desc[0].data[2]); 1312 if (status) 1313 hclge_log_error(dev, "SSU_ETS_TCG_INT", 1314 &hclge_ssu_ets_tcg_int[0], status, 1315 &ae_dev->hw_err_reset_req); 1316 1317 /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ 1318 desc_data = (__le32 *)&desc[1]; 1319 status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; 1320 if (status) 1321 hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", 1322 &hclge_igu_egu_tnl_int[0], status, 1323 &ae_dev->hw_err_reset_req); 1324 1325 /* log PPU(RCB) errors */ 1326 desc_data = (__le32 *)&desc[3]; 1327 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; 1328 if (status) { 1329 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", 1330 &hclge_ppu_pf_abnormal_int[0], status, 1331 &ae_dev->hw_err_reset_req); 1332 hclge_report_hw_error(hdev, HNAE3_PPU_POISON_ERROR); 1333 } 1334 1335 /* clear all PF RAS errors */ 1336 hclge_cmd_reuse_desc(&desc[0], false); 1337 ret = hclge_cmd_send(&hdev->hw, &desc[0], num); 1338 if (ret) 1339 dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); 1340 1341 return ret; 1342 } 1343 1344 static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) 1345 { 1346 u32 mpf_bd_num, pf_bd_num, bd_num; 1347 struct hclge_desc *desc; 1348 int ret; 1349 1350 /* query the number of registers in the RAS int status */ 1351 ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); 1352 if (ret) 1353 return ret; 1354 1355 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 1356 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 1357 if (!desc) 1358 return -ENOMEM; 1359 1360 /* handle all main PF RAS errors */ 1361 ret = hclge_handle_mpf_ras_error(hdev, desc, mpf_bd_num); 1362 if (ret) { 1363 kfree(desc); 1364 return ret; 1365 } 1366 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 1367 1368 /* handle all PF RAS errors */ 1369 ret = hclge_handle_pf_ras_error(hdev, desc, pf_bd_num); 1370 kfree(desc); 1371 1372 return ret; 1373 } 1374 1375 static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) 1376 { 1377 struct device *dev = &hdev->pdev->dev; 1378 struct hclge_desc desc[3]; 1379 int ret; 1380 1381 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 1382 true); 1383 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 1384 true); 1385 hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, 1386 true); 1387 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1388 desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); 1389 1390 ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); 1391 if (ret) { 1392 dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); 1393 return ret; 1394 } 1395 1396 dev_err(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", 1397 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 1398 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 1399 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 1400 dev_err(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", 1401 le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), 1402 le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), 1403 le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); 1404 dev_err(dev, "AXI3: %08X %08X %08X %08X\n", 1405 le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), 1406 le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); 1407 1408 return 0; 1409 } 1410 1411 static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) 1412 { 1413 struct device *dev = &hdev->pdev->dev; 1414 struct hclge_desc desc[2]; 1415 int ret; 1416 1417 ret = hclge_cmd_query_error(hdev, &desc[0], 1418 HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, 1419 HCLGE_CMD_FLAG_NEXT); 1420 if (ret) { 1421 dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); 1422 return ret; 1423 } 1424 1425 dev_err(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", 1426 le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), 1427 le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), 1428 le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); 1429 dev_err(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), 1430 le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); 1431 1432 return 0; 1433 } 1434 1435 static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) 1436 { 1437 struct device *dev = &hdev->pdev->dev; 1438 struct hclge_desc desc[2]; 1439 int ret; 1440 1441 /* read overflow error status */ 1442 ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 1443 0); 1444 if (ret) { 1445 dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); 1446 return ret; 1447 } 1448 1449 /* log overflow error */ 1450 if (le32_to_cpu(desc[0].data[0]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 1451 const struct hclge_hw_error *err; 1452 u32 err_sts; 1453 1454 err = &hclge_rocee_qmm_ovf_err_int[0]; 1455 err_sts = HCLGE_ROCEE_OVF_ERR_TYPE_MASK & 1456 le32_to_cpu(desc[0].data[0]); 1457 while (err->msg) { 1458 if (err->int_msk == err_sts) { 1459 dev_err(dev, "%s [error status=0x%x] found\n", 1460 err->msg, 1461 le32_to_cpu(desc[0].data[0])); 1462 break; 1463 } 1464 err++; 1465 } 1466 } 1467 1468 if (le32_to_cpu(desc[0].data[1]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 1469 dev_err(dev, "ROCEE TSP OVF [error status=0x%x] found\n", 1470 le32_to_cpu(desc[0].data[1])); 1471 } 1472 1473 if (le32_to_cpu(desc[0].data[2]) & HCLGE_ROCEE_OVF_ERR_INT_MASK) { 1474 dev_err(dev, "ROCEE SCC OVF [error status=0x%x] found\n", 1475 le32_to_cpu(desc[0].data[2])); 1476 } 1477 1478 return 0; 1479 } 1480 1481 static enum hnae3_reset_type 1482 hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) 1483 { 1484 enum hnae3_reset_type reset_type = HNAE3_NONE_RESET; 1485 struct device *dev = &hdev->pdev->dev; 1486 struct hclge_desc desc[2]; 1487 unsigned int status; 1488 int ret; 1489 1490 /* read RAS error interrupt status */ 1491 ret = hclge_cmd_query_error(hdev, &desc[0], 1492 HCLGE_QUERY_CLEAR_ROCEE_RAS_INT, 0); 1493 if (ret) { 1494 dev_err(dev, "failed(%d) to query ROCEE RAS INT SRC\n", ret); 1495 /* reset everything for now */ 1496 return HNAE3_GLOBAL_RESET; 1497 } 1498 1499 status = le32_to_cpu(desc[0].data[0]); 1500 1501 if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { 1502 if (status & HCLGE_ROCEE_RERR_INT_MASK) 1503 dev_err(dev, "ROCEE RAS AXI rresp error\n"); 1504 1505 if (status & HCLGE_ROCEE_BERR_INT_MASK) 1506 dev_err(dev, "ROCEE RAS AXI bresp error\n"); 1507 1508 reset_type = HNAE3_FUNC_RESET; 1509 1510 hclge_report_hw_error(hdev, HNAE3_ROCEE_AXI_RESP_ERROR); 1511 1512 ret = hclge_log_rocee_axi_error(hdev); 1513 if (ret) 1514 return HNAE3_GLOBAL_RESET; 1515 } 1516 1517 if (status & HCLGE_ROCEE_ECC_INT_MASK) { 1518 dev_err(dev, "ROCEE RAS 2bit ECC error\n"); 1519 reset_type = HNAE3_GLOBAL_RESET; 1520 1521 ret = hclge_log_rocee_ecc_error(hdev); 1522 if (ret) 1523 return HNAE3_GLOBAL_RESET; 1524 } 1525 1526 if (status & HCLGE_ROCEE_OVF_INT_MASK) { 1527 ret = hclge_log_rocee_ovf_error(hdev); 1528 if (ret) { 1529 dev_err(dev, "failed(%d) to process ovf error\n", ret); 1530 /* reset everything for now */ 1531 return HNAE3_GLOBAL_RESET; 1532 } 1533 } 1534 1535 /* clear error status */ 1536 hclge_cmd_reuse_desc(&desc[0], false); 1537 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 1538 if (ret) { 1539 dev_err(dev, "failed(%d) to clear ROCEE RAS error\n", ret); 1540 /* reset everything for now */ 1541 return HNAE3_GLOBAL_RESET; 1542 } 1543 1544 return reset_type; 1545 } 1546 1547 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) 1548 { 1549 struct device *dev = &hdev->pdev->dev; 1550 struct hclge_desc desc; 1551 int ret; 1552 1553 if (hdev->ae_dev->dev_version < HNAE3_DEVICE_VERSION_V2 || 1554 !hnae3_dev_roce_supported(hdev)) 1555 return 0; 1556 1557 hclge_cmd_setup_basic_desc(&desc, HCLGE_CONFIG_ROCEE_RAS_INT_EN, false); 1558 if (en) { 1559 /* enable ROCEE hw error interrupts */ 1560 desc.data[0] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN); 1561 desc.data[1] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN); 1562 1563 hclge_log_and_clear_rocee_ras_error(hdev); 1564 } 1565 desc.data[2] = cpu_to_le32(HCLGE_ROCEE_RAS_NFE_INT_EN_MASK); 1566 desc.data[3] = cpu_to_le32(HCLGE_ROCEE_RAS_CE_INT_EN_MASK); 1567 1568 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1569 if (ret) 1570 dev_err(dev, "failed(%d) to config ROCEE RAS interrupt\n", ret); 1571 1572 return ret; 1573 } 1574 1575 static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) 1576 { 1577 struct hclge_dev *hdev = ae_dev->priv; 1578 enum hnae3_reset_type reset_type; 1579 1580 if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) 1581 return; 1582 1583 reset_type = hclge_log_and_clear_rocee_ras_error(hdev); 1584 if (reset_type != HNAE3_NONE_RESET) 1585 set_bit(reset_type, &ae_dev->hw_err_reset_req); 1586 } 1587 1588 static const struct hclge_hw_blk hw_blk[] = { 1589 { 1590 .msk = BIT(0), .name = "IGU_EGU", 1591 .config_err_int = hclge_config_igu_egu_hw_err_int, 1592 }, 1593 { 1594 .msk = BIT(1), .name = "PPP", 1595 .config_err_int = hclge_config_ppp_hw_err_int, 1596 }, 1597 { 1598 .msk = BIT(2), .name = "SSU", 1599 .config_err_int = hclge_config_ssu_hw_err_int, 1600 }, 1601 { 1602 .msk = BIT(3), .name = "PPU", 1603 .config_err_int = hclge_config_ppu_hw_err_int, 1604 }, 1605 { 1606 .msk = BIT(4), .name = "TM", 1607 .config_err_int = hclge_config_tm_hw_err_int, 1608 }, 1609 { 1610 .msk = BIT(5), .name = "COMMON", 1611 .config_err_int = hclge_config_common_hw_err_int, 1612 }, 1613 { 1614 .msk = BIT(8), .name = "MAC", 1615 .config_err_int = hclge_config_mac_err_int, 1616 }, 1617 { /* sentinel */ } 1618 }; 1619 1620 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) 1621 { 1622 const struct hclge_hw_blk *module = hw_blk; 1623 int ret = 0; 1624 1625 while (module->name) { 1626 if (module->config_err_int) { 1627 ret = module->config_err_int(hdev, state); 1628 if (ret) 1629 return ret; 1630 } 1631 module++; 1632 } 1633 1634 return ret; 1635 } 1636 1637 pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) 1638 { 1639 struct hclge_dev *hdev = ae_dev->priv; 1640 struct device *dev = &hdev->pdev->dev; 1641 u32 status; 1642 1643 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 1644 dev_err(dev, 1645 "Can't recover - RAS error reported during dev init\n"); 1646 return PCI_ERS_RESULT_NONE; 1647 } 1648 1649 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 1650 1651 if (status & HCLGE_RAS_REG_NFE_MASK || 1652 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) 1653 ae_dev->hw_err_reset_req = 0; 1654 else 1655 goto out; 1656 1657 /* Handling Non-fatal HNS RAS errors */ 1658 if (status & HCLGE_RAS_REG_NFE_MASK) { 1659 dev_err(dev, 1660 "HNS Non-Fatal RAS error(status=0x%x) identified\n", 1661 status); 1662 hclge_handle_all_ras_errors(hdev); 1663 } 1664 1665 /* Handling Non-fatal Rocee RAS errors */ 1666 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2 && 1667 status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { 1668 dev_err(dev, "ROCEE Non-Fatal RAS error identified\n"); 1669 hclge_handle_rocee_ras_error(ae_dev); 1670 } 1671 1672 if (ae_dev->hw_err_reset_req) 1673 return PCI_ERS_RESULT_NEED_RESET; 1674 1675 out: 1676 return PCI_ERS_RESULT_RECOVERED; 1677 } 1678 1679 static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, 1680 struct hclge_desc *desc, bool is_mpf, 1681 u32 bd_num) 1682 { 1683 if (is_mpf) 1684 desc[0].opcode = 1685 cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); 1686 else 1687 desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); 1688 1689 desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN); 1690 1691 return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); 1692 } 1693 1694 /* hclge_query_8bd_info: query information about over_8bd_nfe_err 1695 * @hdev: pointer to struct hclge_dev 1696 * @vf_id: Index of the virtual function with error 1697 * @q_id: Physical index of the queue with error 1698 * 1699 * This function get specific index of queue and function which causes 1700 * over_8bd_nfe_err by using command. If vf_id is 0, it means error is 1701 * caused by PF instead of VF. 1702 */ 1703 static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, 1704 u16 *q_id) 1705 { 1706 struct hclge_query_ppu_pf_other_int_dfx_cmd *req; 1707 struct hclge_desc desc; 1708 int ret; 1709 1710 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); 1711 ret = hclge_cmd_send(&hdev->hw, &desc, 1); 1712 if (ret) 1713 return ret; 1714 1715 req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; 1716 *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); 1717 *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); 1718 1719 return 0; 1720 } 1721 1722 /* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err 1723 * @hdev: pointer to struct hclge_dev 1724 * @reset_requests: reset level that we need to trigger later 1725 * 1726 * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in 1727 * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. 1728 */ 1729 static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, 1730 unsigned long *reset_requests) 1731 { 1732 struct device *dev = &hdev->pdev->dev; 1733 u16 vf_id; 1734 u16 q_id; 1735 int ret; 1736 1737 ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); 1738 if (ret) { 1739 dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", 1740 ret); 1741 return; 1742 } 1743 1744 dev_err(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%u), queue_id(%u)\n", 1745 vf_id, q_id); 1746 1747 if (vf_id) { 1748 if (vf_id >= hdev->num_alloc_vport) { 1749 dev_err(dev, "invalid vf id(%u)\n", vf_id); 1750 return; 1751 } 1752 1753 /* If we need to trigger other reset whose level is higher 1754 * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset 1755 * here. 1756 */ 1757 if (*reset_requests != 0) 1758 return; 1759 1760 ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); 1761 if (ret) 1762 dev_err(dev, "inform reset to vf(%u) failed %d!\n", 1763 hdev->vport->vport_id, ret); 1764 } else { 1765 set_bit(HNAE3_FUNC_RESET, reset_requests); 1766 } 1767 } 1768 1769 /* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors 1770 * @hdev: pointer to struct hclge_dev 1771 * @desc: descriptor for describing the command 1772 * @mpf_bd_num: number of extended command structures 1773 * @reset_requests: record of the reset level that we need 1774 * 1775 * This function handles all the main PF MSI-X errors in the hw register/s 1776 * using command. 1777 */ 1778 static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, 1779 struct hclge_desc *desc, 1780 int mpf_bd_num, 1781 unsigned long *reset_requests) 1782 { 1783 struct device *dev = &hdev->pdev->dev; 1784 __le32 *desc_data; 1785 u32 status; 1786 int ret; 1787 /* query all main PF MSIx errors */ 1788 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, 1789 true); 1790 ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); 1791 if (ret) { 1792 dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); 1793 return ret; 1794 } 1795 1796 /* log MAC errors */ 1797 desc_data = (__le32 *)&desc[1]; 1798 status = le32_to_cpu(*desc_data); 1799 if (status) 1800 hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", 1801 &hclge_mac_afifo_tnl_int[0], status, 1802 reset_requests); 1803 1804 /* log PPU(RCB) MPF errors */ 1805 desc_data = (__le32 *)&desc[5]; 1806 status = le32_to_cpu(*(desc_data + 2)) & 1807 HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; 1808 if (status) 1809 dev_err(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", 1810 status); 1811 1812 /* clear all main PF MSIx errors */ 1813 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 1814 if (ret) 1815 dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); 1816 1817 return ret; 1818 } 1819 1820 /* hclge_handle_pf_msix_error: handle all PF MSI-X errors 1821 * @hdev: pointer to struct hclge_dev 1822 * @desc: descriptor for describing the command 1823 * @mpf_bd_num: number of extended command structures 1824 * @reset_requests: record of the reset level that we need 1825 * 1826 * This function handles all the PF MSI-X errors in the hw register/s using 1827 * command. 1828 */ 1829 static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, 1830 struct hclge_desc *desc, 1831 int pf_bd_num, 1832 unsigned long *reset_requests) 1833 { 1834 struct device *dev = &hdev->pdev->dev; 1835 __le32 *desc_data; 1836 u32 status; 1837 int ret; 1838 1839 /* query all PF MSIx errors */ 1840 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, 1841 true); 1842 ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); 1843 if (ret) { 1844 dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); 1845 return ret; 1846 } 1847 1848 /* log SSU PF errors */ 1849 status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; 1850 if (status) 1851 hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", 1852 &hclge_ssu_port_based_pf_int[0], 1853 status, reset_requests); 1854 1855 /* read and log PPP PF errors */ 1856 desc_data = (__le32 *)&desc[2]; 1857 status = le32_to_cpu(*desc_data); 1858 if (status) 1859 hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", 1860 &hclge_ppp_pf_abnormal_int[0], 1861 status, reset_requests); 1862 1863 /* log PPU(RCB) PF errors */ 1864 desc_data = (__le32 *)&desc[3]; 1865 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; 1866 if (status) 1867 hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", 1868 &hclge_ppu_pf_abnormal_int[0], 1869 status, reset_requests); 1870 1871 status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; 1872 if (status) 1873 hclge_handle_over_8bd_err(hdev, reset_requests); 1874 1875 /* clear all PF MSIx errors */ 1876 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 1877 if (ret) 1878 dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); 1879 1880 return ret; 1881 } 1882 1883 static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, 1884 unsigned long *reset_requests) 1885 { 1886 struct hclge_mac_tnl_stats mac_tnl_stats; 1887 struct device *dev = &hdev->pdev->dev; 1888 u32 mpf_bd_num, pf_bd_num, bd_num; 1889 struct hclge_desc *desc; 1890 u32 status; 1891 int ret; 1892 1893 /* query the number of bds for the MSIx int status */ 1894 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 1895 if (ret) 1896 goto out; 1897 1898 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 1899 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 1900 if (!desc) 1901 return -ENOMEM; 1902 1903 ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, 1904 reset_requests); 1905 if (ret) 1906 goto msi_error; 1907 1908 memset(desc, 0, bd_num * sizeof(struct hclge_desc)); 1909 ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); 1910 if (ret) 1911 goto msi_error; 1912 1913 /* query and clear mac tnl interruptions */ 1914 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT, 1915 true); 1916 ret = hclge_cmd_send(&hdev->hw, &desc[0], 1); 1917 if (ret) { 1918 dev_err(dev, "query mac tnl int cmd failed (%d)\n", ret); 1919 goto msi_error; 1920 } 1921 1922 status = le32_to_cpu(desc->data[0]); 1923 if (status) { 1924 /* When mac tnl interrupt occurs, we record current time and 1925 * register status here in a fifo, then clear the status. So 1926 * that if link status changes suddenly at some time, we can 1927 * query them by debugfs. 1928 */ 1929 mac_tnl_stats.time = local_clock(); 1930 mac_tnl_stats.status = status; 1931 kfifo_put(&hdev->mac_tnl_log, mac_tnl_stats); 1932 ret = hclge_clear_mac_tnl_int(hdev); 1933 if (ret) 1934 dev_err(dev, "clear mac tnl int failed (%d)\n", ret); 1935 } 1936 1937 msi_error: 1938 kfree(desc); 1939 out: 1940 return ret; 1941 } 1942 1943 int hclge_handle_hw_msix_error(struct hclge_dev *hdev, 1944 unsigned long *reset_requests) 1945 { 1946 struct device *dev = &hdev->pdev->dev; 1947 1948 if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { 1949 dev_err(dev, 1950 "Can't handle - MSIx error reported during dev init\n"); 1951 return 0; 1952 } 1953 1954 return hclge_handle_all_hw_msix_error(hdev, reset_requests); 1955 } 1956 1957 void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) 1958 { 1959 #define HCLGE_DESC_NO_DATA_LEN 8 1960 1961 struct hclge_dev *hdev = ae_dev->priv; 1962 struct device *dev = &hdev->pdev->dev; 1963 u32 mpf_bd_num, pf_bd_num, bd_num; 1964 struct hclge_desc *desc; 1965 u32 status; 1966 int ret; 1967 1968 ae_dev->hw_err_reset_req = 0; 1969 status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); 1970 1971 /* query the number of bds for the MSIx int status */ 1972 ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); 1973 if (ret) 1974 return; 1975 1976 bd_num = max_t(u32, mpf_bd_num, pf_bd_num); 1977 desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); 1978 if (!desc) 1979 return; 1980 1981 /* Clear HNS hw errors reported through msix */ 1982 memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - 1983 HCLGE_DESC_NO_DATA_LEN); 1984 ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); 1985 if (ret) { 1986 dev_err(dev, "fail(%d) to clear mpf msix int during init\n", 1987 ret); 1988 goto msi_error; 1989 } 1990 1991 memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - 1992 HCLGE_DESC_NO_DATA_LEN); 1993 ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); 1994 if (ret) { 1995 dev_err(dev, "fail(%d) to clear pf msix int during init\n", 1996 ret); 1997 goto msi_error; 1998 } 1999 2000 /* Handle Non-fatal HNS RAS errors */ 2001 if (status & HCLGE_RAS_REG_NFE_MASK) { 2002 dev_err(dev, "HNS hw error(RAS) identified during init\n"); 2003 hclge_handle_all_ras_errors(hdev); 2004 } 2005 2006 msi_error: 2007 kfree(desc); 2008 } 2009