1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 #include "ena_hw.h" 17 #include "ena.h" 18 19 CTASSERT(sizeof (enahw_aenq_desc_t) == 64); 20 21 /* 22 * We add this here as an extra safety check to make sure that any 23 * addition to the AENQ group enum also updates the groups array num 24 * value. 25 */ 26 CTASSERT(ENAHW_AENQ_GROUPS_ARR_NUM == 8); 27 28 typedef struct ena_aenq_grpstr { 29 enahw_aenq_groups_t eag_type; 30 const char *eag_str; 31 } ena_aenq_grpstr_t; 32 33 static ena_aenq_grpstr_t ena_groups_str[ENAHW_AENQ_GROUPS_ARR_NUM] = { 34 { 35 .eag_type = ENAHW_AENQ_GROUP_LINK_CHANGE, 36 .eag_str = "LINK CHANGE" 37 }, 38 { 39 .eag_type = ENAHW_AENQ_GROUP_FATAL_ERROR, 40 .eag_str = "FATAL ERROR" 41 }, 42 { 43 .eag_type = ENAHW_AENQ_GROUP_WARNING, 44 .eag_str = "WARNING" 45 }, 46 { 47 .eag_type = ENAHW_AENQ_GROUP_NOTIFICATION, 48 .eag_str = "NOTIFICATION" 49 }, 50 { 51 .eag_type = ENAHW_AENQ_GROUP_KEEP_ALIVE, 52 .eag_str = "KEEP ALIVE" 53 }, 54 { 55 .eag_type = ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES, 56 .eag_str = "REFRESH CAPABILITIES" 57 }, 58 { 59 .eag_type = ENAHW_AENQ_GROUP_CONF_NOTIFICATIONS, 60 .eag_str = "CONFIG NOTIFICATIONS" 61 }, 62 { 63 .eag_type = ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET, 64 .eag_str = "DEVICE RESET REQUEST" 65 } 66 }; 67 68 bool 69 ena_aenq_configure(ena_t *ena) 70 { 71 enahw_cmd_desc_t cmd; 72 enahw_feat_aenq_t *cmd_feat = 73 &cmd.ecd_cmd.ecd_set_feat.ecsf_feat.ecsf_aenq; 74 enahw_resp_desc_t resp; 75 enahw_feat_aenq_t *resp_feat = &resp.erd_resp.erd_get_feat.ergf_aenq; 76 enahw_aenq_groups_t to_enable; 77 78 bzero(&resp, sizeof (resp)); 79 if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG, 80 ENAHW_FEAT_AENQ_CONFIG_VER) != 0) { 81 return (false); 82 } 83 84 to_enable = BIT(ENAHW_AENQ_GROUP_LINK_CHANGE) | 85 BIT(ENAHW_AENQ_GROUP_FATAL_ERROR) | 86 BIT(ENAHW_AENQ_GROUP_WARNING) | 87 BIT(ENAHW_AENQ_GROUP_NOTIFICATION) | 88 BIT(ENAHW_AENQ_GROUP_KEEP_ALIVE) | 89 BIT(ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET); 90 to_enable &= resp_feat->efa_supported_groups; 91 92 bzero(&cmd, sizeof (cmd)); 93 bzero(&resp, sizeof (cmd)); 94 cmd_feat->efa_enabled_groups = to_enable; 95 96 if (ena_set_feature(ena, &cmd, &resp, ENAHW_FEAT_AENQ_CONFIG, 97 ENAHW_FEAT_AENQ_CONFIG_VER) != 0) { 98 return (false); 99 } 100 101 bzero(&resp, sizeof (resp)); 102 if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG, 103 ENAHW_FEAT_AENQ_CONFIG_VER) != 0) { 104 return (false); 105 } 106 107 ena->ena_aenq_supported_groups = resp_feat->efa_supported_groups; 108 ena->ena_aenq_enabled_groups = resp_feat->efa_enabled_groups; 109 110 for (uint_t i = 0; i < ENAHW_AENQ_GROUPS_ARR_NUM; i++) { 111 ena_aenq_grpstr_t *grpstr = &ena_groups_str[i]; 112 bool supported = BIT(grpstr->eag_type) & 113 resp_feat->efa_supported_groups; 114 bool enabled = BIT(grpstr->eag_type) & 115 resp_feat->efa_enabled_groups; 116 117 ena_dbg(ena, "%s supported: %s enabled: %s", grpstr->eag_str, 118 supported ? "Y" : "N", enabled ? "Y" : "N"); 119 } 120 121 return (true); 122 } 123 124 void 125 ena_aenq_work(ena_t *ena) 126 { 127 ena_aenq_t *aenq = &ena->ena_aenq; 128 uint16_t head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1); 129 bool processed = false; 130 enahw_aenq_desc_t *desc = &aenq->eaenq_descs[head_mod]; 131 132 ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORKERNEL); 133 134 while (ENAHW_AENQ_DESC_PHASE(desc) == aenq->eaenq_phase) { 135 ena_aenq_hdlr_t hdlr; 136 137 ASSERT3U(desc->ead_group, <, ENAHW_AENQ_GROUPS_ARR_NUM); 138 processed = true; 139 140 /* 141 * Keepalives occur once per second, we won't issue a debug 142 * log message for each of those. 143 */ 144 if (ena_debug && 145 desc->ead_group != ENAHW_AENQ_GROUP_KEEP_ALIVE) { 146 uint64_t ts = ((uint64_t)desc->ead_ts_high << 32) | 147 (uint64_t)desc->ead_ts_low; 148 149 ena_dbg(ena, 150 "AENQ Group: (0x%x) %s Syndrome: 0x%x ts: %" PRIu64 151 " us", desc->ead_group, 152 ena_groups_str[desc->ead_group].eag_str, 153 desc->ead_syndrome, ts); 154 } 155 156 hdlr = ena->ena_aenq.eaenq_hdlrs[desc->ead_group]; 157 hdlr(ena, desc); 158 159 aenq->eaenq_head++; 160 head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1); 161 162 if (head_mod == 0) 163 aenq->eaenq_phase ^= 1; 164 165 desc = &aenq->eaenq_descs[head_mod]; 166 } 167 168 if (processed) { 169 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB, 170 aenq->eaenq_head); 171 } 172 } 173 174 static void 175 ena_aenq_link_change_hdlr(void *data, enahw_aenq_desc_t *desc) 176 { 177 ena_t *ena = data; 178 bool is_up = (desc->ead_payload.link_change.flags & 179 ENAHW_AENQ_LINK_CHANGE_LINK_STATUS_MASK) != 0; 180 link_state_t new_state = is_up ? LINK_STATE_UP : LINK_STATE_DOWN; 181 182 /* 183 * The interrupts are not enabled until after we register mac, 184 * so the mac handle should be valid. 185 */ 186 ASSERT3U(ena->ena_attach_seq, >=, ENA_ATTACH_MAC_REGISTER); 187 ena->ena_aenq_stat.eaes_link_change.value.ui64++; 188 189 ena_dbg(ena, "link is %s", is_up ? "UP" : "DOWN"); 190 191 mutex_enter(&ena->ena_lock); 192 193 /* 194 * Notify mac only on an actual change in status. 195 */ 196 if (ena->ena_link_state != new_state) { 197 mac_link_update(ena->ena_mh, new_state); 198 ena->ena_link_state = new_state; 199 } 200 201 mutex_exit(&ena->ena_lock); 202 } 203 204 static void 205 ena_aenq_notification_hdlr(void *data, enahw_aenq_desc_t *desc) 206 { 207 ena_t *ena = data; 208 209 if (desc->ead_syndrome == ENAHW_AENQ_SYNDROME_UPDATE_HINTS) { 210 enahw_device_hints_t *hints = 211 (enahw_device_hints_t *)desc->ead_payload.raw; 212 213 ena_update_hints(ena, hints); 214 } else { 215 ena_err(ena, "Invalid aenq notification syndrome 0x%x", 216 desc->ead_syndrome); 217 } 218 219 ena->ena_aenq_stat.eaes_notification.value.ui64++; 220 } 221 222 static void 223 ena_aenq_keep_alive_hdlr(void *data, enahw_aenq_desc_t *desc) 224 { 225 ena_t *ena = data; 226 uint64_t rx_drops, tx_drops, rx_overruns; 227 ena_basic_stat_t *ebs = ena->ena_device_basic_kstat->ks_data; 228 uint64_t now = (uint64_t)gethrtime(); 229 230 (void) atomic_swap_64(&ena->ena_watchdog_last_keepalive, now); 231 232 rx_drops = 233 ((uint64_t)desc->ead_payload.keep_alive.rx_drops_high << 32) | 234 desc->ead_payload.keep_alive.rx_drops_low; 235 tx_drops = 236 ((uint64_t)desc->ead_payload.keep_alive.tx_drops_high << 32) | 237 desc->ead_payload.keep_alive.tx_drops_low; 238 rx_overruns = 239 ((uint64_t)desc->ead_payload.keep_alive.rx_overruns_high << 32) | 240 desc->ead_payload.keep_alive.rx_overruns_low; 241 242 mutex_enter(&ena->ena_device_basic_stat_lock); 243 ebs->ebs_rx_drops.value.ui64 = rx_drops; 244 ebs->ebs_tx_drops.value.ui64 = tx_drops; 245 ebs->ebs_rx_overruns.value.ui64 = rx_overruns; 246 mutex_exit(&ena->ena_device_basic_stat_lock); 247 248 ena->ena_aenq_stat.eaes_keep_alive.value.ui64++; 249 } 250 251 static void 252 ena_aenq_request_reset_hdlr(void *data, enahw_aenq_desc_t *desc) 253 { 254 ena_t *ena = data; 255 256 ena->ena_reset_reason = ENAHW_RESET_DEVICE_REQUEST; 257 atomic_or_32(&ena->ena_state, ENA_STATE_ERROR); 258 259 ena->ena_aenq_stat.eaes_request_reset.value.ui64++; 260 } 261 262 static void 263 ena_aenq_fatal_error_hdlr(void *data, enahw_aenq_desc_t *desc) 264 { 265 ena_t *ena = data; 266 267 /* 268 * The other open source drivers register this event but don't do 269 * anything when it triggers. We do the same for now. If this indicates 270 * that the fatal error bit has been set in the status register, the 271 * watchdog will pick that up directly and issue a reset. 272 */ 273 ena->ena_aenq_stat.eaes_fatal_error.value.ui64++; 274 } 275 276 static void 277 ena_aenq_warning_hdlr(void *data, enahw_aenq_desc_t *desc) 278 { 279 ena_t *ena = data; 280 281 /* 282 * The other open source drivers register this event but don't do 283 * anything when it triggers. We do the same for now. 284 */ 285 ena->ena_aenq_stat.eaes_warning.value.ui64++; 286 } 287 288 static void 289 ena_aenq_default_hdlr(void *data, enahw_aenq_desc_t *desc) 290 { 291 ena_t *ena = data; 292 293 ena->ena_aenq_stat.eaes_default.value.ui64++; 294 /* 295 * We don't enable any of the groups that we don't support, so this 296 * should not happen. 297 */ 298 ena_dbg(ena, "unimplemented handler for aenq group: %s", 299 ena_groups_str[desc->ead_group].eag_str); 300 } 301 302 static void 303 ena_aenq_set_hdlrs(ena_aenq_t *aenq) 304 { 305 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_LINK_CHANGE] = 306 ena_aenq_link_change_hdlr; 307 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_NOTIFICATION] = 308 ena_aenq_notification_hdlr; 309 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_KEEP_ALIVE] = 310 ena_aenq_keep_alive_hdlr; 311 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET] = 312 ena_aenq_request_reset_hdlr; 313 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_FATAL_ERROR] = 314 ena_aenq_fatal_error_hdlr; 315 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_WARNING] = 316 ena_aenq_warning_hdlr; 317 318 /* The following events are not handled */ 319 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES] = 320 ena_aenq_default_hdlr; 321 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_CONF_NOTIFICATIONS] = 322 ena_aenq_default_hdlr; 323 } 324 325 bool 326 ena_aenq_init(ena_t *ena) 327 { 328 ena_aenq_t *aenq = &ena->ena_aenq; 329 uint32_t addr_low, addr_high, wval; 330 331 if (aenq->eaenq_descs == NULL) { 332 size_t size; 333 334 aenq->eaenq_num_descs = ENA_AENQ_NUM_DESCS; 335 size = aenq->eaenq_num_descs * sizeof (*aenq->eaenq_descs); 336 337 ena_dma_conf_t conf = { 338 .edc_size = size, 339 .edc_align = ENAHW_AENQ_DESC_BUF_ALIGNMENT, 340 .edc_sgl = 1, 341 .edc_endian = DDI_NEVERSWAP_ACC, 342 .edc_stream = false, 343 }; 344 345 if (!ena_dma_alloc(ena, &aenq->eaenq_dma, &conf, size)) { 346 ena_err(ena, "failed to allocate DMA for AENQ"); 347 return (false); 348 } 349 350 ENA_DMA_VERIFY_ADDR(ena, 351 aenq->eaenq_dma.edb_cookie->dmac_laddress); 352 aenq->eaenq_descs = (void *)aenq->eaenq_dma.edb_va; 353 ena_aenq_set_hdlrs(aenq); 354 } else { 355 ena_dma_bzero(&aenq->eaenq_dma); 356 } 357 358 aenq->eaenq_head = aenq->eaenq_num_descs; 359 aenq->eaenq_phase = 1; 360 361 addr_low = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress); 362 addr_high = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress >> 32); 363 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_LO, addr_low); 364 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_HI, addr_high); 365 ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORDEV); 366 wval = ENAHW_AENQ_CAPS_DEPTH(aenq->eaenq_num_descs) | 367 ENAHW_AENQ_CAPS_ENTRY_SIZE(sizeof (*aenq->eaenq_descs)); 368 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_CAPS, wval); 369 370 return (true); 371 } 372 373 void 374 ena_aenq_enable(ena_t *ena) 375 { 376 /* 377 * We set this to zero here so that the watchdog will ignore it until 378 * the first keepalive event is received. Devices that do not support 379 * sending keepalives will result in this value remaining at 0. 380 */ 381 ena->ena_watchdog_last_keepalive = 0; 382 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB, 383 ena->ena_aenq.eaenq_head); 384 } 385 386 void 387 ena_aenq_free(ena_t *ena) 388 { 389 ena_dma_free(&ena->ena_aenq.eaenq_dma); 390 } 391