1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16 #include "ena_hw.h"
17 #include "ena.h"
18
19 CTASSERT(sizeof (enahw_aenq_desc_t) == 64);
20
21 /*
22 * We add this here as an extra safety check to make sure that any
23 * addition to the AENQ group enum also updates the groups array num
24 * value.
25 */
26 CTASSERT(ENAHW_AENQ_GROUPS_ARR_NUM == 8);
27
28 typedef struct ena_aenq_grpstr {
29 enahw_aenq_groups_t eag_type;
30 const char *eag_str;
31 } ena_aenq_grpstr_t;
32
33 static ena_aenq_grpstr_t ena_groups_str[ENAHW_AENQ_GROUPS_ARR_NUM] = {
34 {
35 .eag_type = ENAHW_AENQ_GROUP_LINK_CHANGE,
36 .eag_str = "LINK CHANGE"
37 },
38 {
39 .eag_type = ENAHW_AENQ_GROUP_FATAL_ERROR,
40 .eag_str = "FATAL ERROR"
41 },
42 {
43 .eag_type = ENAHW_AENQ_GROUP_WARNING,
44 .eag_str = "WARNING"
45 },
46 {
47 .eag_type = ENAHW_AENQ_GROUP_NOTIFICATION,
48 .eag_str = "NOTIFICATION"
49 },
50 {
51 .eag_type = ENAHW_AENQ_GROUP_KEEP_ALIVE,
52 .eag_str = "KEEP ALIVE"
53 },
54 {
55 .eag_type = ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES,
56 .eag_str = "REFRESH CAPABILITIES"
57 },
58 {
59 .eag_type = ENAHW_AENQ_GROUP_CONF_NOTIFICATIONS,
60 .eag_str = "CONFIG NOTIFICATIONS"
61 },
62 {
63 .eag_type = ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET,
64 .eag_str = "DEVICE RESET REQUEST"
65 }
66 };
67
68 bool
ena_aenq_configure(ena_t * ena)69 ena_aenq_configure(ena_t *ena)
70 {
71 enahw_cmd_desc_t cmd;
72 enahw_feat_aenq_t *cmd_feat =
73 &cmd.ecd_cmd.ecd_set_feat.ecsf_feat.ecsf_aenq;
74 enahw_resp_desc_t resp;
75 enahw_feat_aenq_t *resp_feat = &resp.erd_resp.erd_get_feat.ergf_aenq;
76 enahw_aenq_groups_t to_enable;
77
78 bzero(&resp, sizeof (resp));
79 if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG,
80 ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
81 return (false);
82 }
83
84 to_enable = BIT(ENAHW_AENQ_GROUP_LINK_CHANGE) |
85 BIT(ENAHW_AENQ_GROUP_FATAL_ERROR) |
86 BIT(ENAHW_AENQ_GROUP_WARNING) |
87 BIT(ENAHW_AENQ_GROUP_NOTIFICATION) |
88 BIT(ENAHW_AENQ_GROUP_KEEP_ALIVE) |
89 BIT(ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET);
90 to_enable &= resp_feat->efa_supported_groups;
91
92 bzero(&cmd, sizeof (cmd));
93 bzero(&resp, sizeof (cmd));
94 cmd_feat->efa_enabled_groups = to_enable;
95
96 if (ena_set_feature(ena, &cmd, &resp, ENAHW_FEAT_AENQ_CONFIG,
97 ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
98 return (false);
99 }
100
101 bzero(&resp, sizeof (resp));
102 if (ena_get_feature(ena, &resp, ENAHW_FEAT_AENQ_CONFIG,
103 ENAHW_FEAT_AENQ_CONFIG_VER) != 0) {
104 return (false);
105 }
106
107 ena->ena_aenq_supported_groups = resp_feat->efa_supported_groups;
108 ena->ena_aenq_enabled_groups = resp_feat->efa_enabled_groups;
109
110 for (uint_t i = 0; i < ENAHW_AENQ_GROUPS_ARR_NUM; i++) {
111 ena_aenq_grpstr_t *grpstr = &ena_groups_str[i];
112 bool supported = BIT(grpstr->eag_type) &
113 resp_feat->efa_supported_groups;
114 bool enabled = BIT(grpstr->eag_type) &
115 resp_feat->efa_enabled_groups;
116
117 ena_dbg(ena, "%s supported: %s enabled: %s", grpstr->eag_str,
118 supported ? "Y" : "N", enabled ? "Y" : "N");
119 }
120
121 return (true);
122 }
123
124 void
ena_aenq_work(ena_t * ena)125 ena_aenq_work(ena_t *ena)
126 {
127 ena_aenq_t *aenq = &ena->ena_aenq;
128 uint16_t head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1);
129 bool processed = false;
130 enahw_aenq_desc_t *desc = &aenq->eaenq_descs[head_mod];
131
132 ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORKERNEL);
133
134 while (ENAHW_AENQ_DESC_PHASE(desc) == aenq->eaenq_phase) {
135 ena_aenq_hdlr_t hdlr;
136
137 ASSERT3U(desc->ead_group, <, ENAHW_AENQ_GROUPS_ARR_NUM);
138 processed = true;
139
140 /*
141 * Keepalives occur once per second, we won't issue a debug
142 * log message for each of those.
143 */
144 if (ena_debug &&
145 desc->ead_group != ENAHW_AENQ_GROUP_KEEP_ALIVE) {
146 uint64_t ts = ((uint64_t)desc->ead_ts_high << 32) |
147 (uint64_t)desc->ead_ts_low;
148
149 ena_dbg(ena,
150 "AENQ Group: (0x%x) %s Syndrome: 0x%x ts: %" PRIu64
151 " us", desc->ead_group,
152 ena_groups_str[desc->ead_group].eag_str,
153 desc->ead_syndrome, ts);
154 }
155
156 hdlr = ena->ena_aenq.eaenq_hdlrs[desc->ead_group];
157 hdlr(ena, desc);
158
159 aenq->eaenq_head++;
160 head_mod = aenq->eaenq_head & (aenq->eaenq_num_descs - 1);
161
162 if (head_mod == 0)
163 aenq->eaenq_phase ^= 1;
164
165 desc = &aenq->eaenq_descs[head_mod];
166 }
167
168 if (processed) {
169 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB,
170 aenq->eaenq_head);
171 }
172 }
173
174 static void
ena_aenq_link_change_hdlr(void * data,enahw_aenq_desc_t * desc)175 ena_aenq_link_change_hdlr(void *data, enahw_aenq_desc_t *desc)
176 {
177 ena_t *ena = data;
178 bool is_up = (desc->ead_payload.link_change.flags &
179 ENAHW_AENQ_LINK_CHANGE_LINK_STATUS_MASK) != 0;
180 link_state_t new_state = is_up ? LINK_STATE_UP : LINK_STATE_DOWN;
181
182 /*
183 * The interrupts are not enabled until after we register mac,
184 * so the mac handle should be valid.
185 */
186 ASSERT3U(ena->ena_attach_seq, >=, ENA_ATTACH_MAC_REGISTER);
187 ena->ena_aenq_stat.eaes_link_change.value.ui64++;
188
189 ena_dbg(ena, "link is %s", is_up ? "UP" : "DOWN");
190
191 mutex_enter(&ena->ena_lock);
192
193 /*
194 * Notify mac only on an actual change in status.
195 */
196 if (ena->ena_link_state != new_state) {
197 mac_link_update(ena->ena_mh, new_state);
198 ena->ena_link_state = new_state;
199 }
200
201 mutex_exit(&ena->ena_lock);
202 }
203
204 static void
ena_aenq_notification_hdlr(void * data,enahw_aenq_desc_t * desc)205 ena_aenq_notification_hdlr(void *data, enahw_aenq_desc_t *desc)
206 {
207 ena_t *ena = data;
208
209 if (desc->ead_syndrome == ENAHW_AENQ_SYNDROME_UPDATE_HINTS) {
210 enahw_device_hints_t *hints =
211 (enahw_device_hints_t *)desc->ead_payload.raw;
212
213 ena_update_hints(ena, hints);
214 } else {
215 ena_err(ena, "Invalid aenq notification syndrome 0x%x",
216 desc->ead_syndrome);
217 }
218
219 ena->ena_aenq_stat.eaes_notification.value.ui64++;
220 }
221
222 static void
ena_aenq_keep_alive_hdlr(void * data,enahw_aenq_desc_t * desc)223 ena_aenq_keep_alive_hdlr(void *data, enahw_aenq_desc_t *desc)
224 {
225 ena_t *ena = data;
226 uint64_t rx_drops, tx_drops, rx_overruns;
227 ena_basic_stat_t *ebs = ena->ena_device_basic_kstat->ks_data;
228 uint64_t now = (uint64_t)gethrtime();
229
230 (void) atomic_swap_64(&ena->ena_watchdog_last_keepalive, now);
231
232 rx_drops =
233 ((uint64_t)desc->ead_payload.keep_alive.rx_drops_high << 32) |
234 desc->ead_payload.keep_alive.rx_drops_low;
235 tx_drops =
236 ((uint64_t)desc->ead_payload.keep_alive.tx_drops_high << 32) |
237 desc->ead_payload.keep_alive.tx_drops_low;
238 rx_overruns =
239 ((uint64_t)desc->ead_payload.keep_alive.rx_overruns_high << 32) |
240 desc->ead_payload.keep_alive.rx_overruns_low;
241
242 mutex_enter(&ena->ena_device_basic_stat_lock);
243 ebs->ebs_rx_drops.value.ui64 = rx_drops;
244 ebs->ebs_tx_drops.value.ui64 = tx_drops;
245 ebs->ebs_rx_overruns.value.ui64 = rx_overruns;
246 mutex_exit(&ena->ena_device_basic_stat_lock);
247
248 ena->ena_aenq_stat.eaes_keep_alive.value.ui64++;
249 }
250
251 static void
ena_aenq_request_reset_hdlr(void * data,enahw_aenq_desc_t * desc)252 ena_aenq_request_reset_hdlr(void *data, enahw_aenq_desc_t *desc)
253 {
254 ena_t *ena = data;
255
256 ena->ena_reset_reason = ENAHW_RESET_DEVICE_REQUEST;
257 atomic_or_32(&ena->ena_state, ENA_STATE_ERROR);
258
259 ena->ena_aenq_stat.eaes_request_reset.value.ui64++;
260 }
261
262 static void
ena_aenq_fatal_error_hdlr(void * data,enahw_aenq_desc_t * desc)263 ena_aenq_fatal_error_hdlr(void *data, enahw_aenq_desc_t *desc)
264 {
265 ena_t *ena = data;
266
267 /*
268 * The other open source drivers register this event but don't do
269 * anything when it triggers. We do the same for now. If this indicates
270 * that the fatal error bit has been set in the status register, the
271 * watchdog will pick that up directly and issue a reset.
272 */
273 ena->ena_aenq_stat.eaes_fatal_error.value.ui64++;
274 }
275
276 static void
ena_aenq_warning_hdlr(void * data,enahw_aenq_desc_t * desc)277 ena_aenq_warning_hdlr(void *data, enahw_aenq_desc_t *desc)
278 {
279 ena_t *ena = data;
280
281 /*
282 * The other open source drivers register this event but don't do
283 * anything when it triggers. We do the same for now.
284 */
285 ena->ena_aenq_stat.eaes_warning.value.ui64++;
286 }
287
288 static void
ena_aenq_default_hdlr(void * data,enahw_aenq_desc_t * desc)289 ena_aenq_default_hdlr(void *data, enahw_aenq_desc_t *desc)
290 {
291 ena_t *ena = data;
292
293 ena->ena_aenq_stat.eaes_default.value.ui64++;
294 /*
295 * We don't enable any of the groups that we don't support, so this
296 * should not happen.
297 */
298 ena_dbg(ena, "unimplemented handler for aenq group: %s",
299 ena_groups_str[desc->ead_group].eag_str);
300 }
301
302 static void
ena_aenq_set_hdlrs(ena_aenq_t * aenq)303 ena_aenq_set_hdlrs(ena_aenq_t *aenq)
304 {
305 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_LINK_CHANGE] =
306 ena_aenq_link_change_hdlr;
307 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_NOTIFICATION] =
308 ena_aenq_notification_hdlr;
309 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_KEEP_ALIVE] =
310 ena_aenq_keep_alive_hdlr;
311 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_DEVICE_REQUEST_RESET] =
312 ena_aenq_request_reset_hdlr;
313 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_FATAL_ERROR] =
314 ena_aenq_fatal_error_hdlr;
315 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_WARNING] =
316 ena_aenq_warning_hdlr;
317
318 /* The following events are not handled */
319 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_REFRESH_CAPABILITIES] =
320 ena_aenq_default_hdlr;
321 aenq->eaenq_hdlrs[ENAHW_AENQ_GROUP_CONF_NOTIFICATIONS] =
322 ena_aenq_default_hdlr;
323 }
324
325 bool
ena_aenq_init(ena_t * ena)326 ena_aenq_init(ena_t *ena)
327 {
328 ena_aenq_t *aenq = &ena->ena_aenq;
329 uint32_t addr_low, addr_high, wval;
330
331 if (aenq->eaenq_descs == NULL) {
332 size_t size;
333
334 aenq->eaenq_num_descs = ENA_AENQ_NUM_DESCS;
335 size = aenq->eaenq_num_descs * sizeof (*aenq->eaenq_descs);
336
337 ena_dma_conf_t conf = {
338 .edc_size = size,
339 .edc_align = ENAHW_AENQ_DESC_BUF_ALIGNMENT,
340 .edc_sgl = 1,
341 .edc_endian = DDI_NEVERSWAP_ACC,
342 .edc_stream = false,
343 };
344
345 if (!ena_dma_alloc(ena, &aenq->eaenq_dma, &conf, size)) {
346 ena_err(ena, "failed to allocate DMA for AENQ");
347 return (false);
348 }
349
350 ENA_DMA_VERIFY_ADDR(ena,
351 aenq->eaenq_dma.edb_cookie->dmac_laddress);
352 aenq->eaenq_descs = (void *)aenq->eaenq_dma.edb_va;
353 ena_aenq_set_hdlrs(aenq);
354 } else {
355 ena_dma_bzero(&aenq->eaenq_dma);
356 }
357
358 aenq->eaenq_head = aenq->eaenq_num_descs;
359 aenq->eaenq_phase = 1;
360
361 addr_low = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress);
362 addr_high = (uint32_t)(aenq->eaenq_dma.edb_cookie->dmac_laddress >> 32);
363 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_LO, addr_low);
364 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_BASE_HI, addr_high);
365 ENA_DMA_SYNC(aenq->eaenq_dma, DDI_DMA_SYNC_FORDEV);
366 wval = ENAHW_AENQ_CAPS_DEPTH(aenq->eaenq_num_descs) |
367 ENAHW_AENQ_CAPS_ENTRY_SIZE(sizeof (*aenq->eaenq_descs));
368 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_CAPS, wval);
369
370 return (true);
371 }
372
373 void
ena_aenq_enable(ena_t * ena)374 ena_aenq_enable(ena_t *ena)
375 {
376 /*
377 * We set this to zero here so that the watchdog will ignore it until
378 * the first keepalive event is received. Devices that do not support
379 * sending keepalives will result in this value remaining at 0.
380 */
381 ena->ena_watchdog_last_keepalive = 0;
382 ena_hw_bar_write32(ena, ENAHW_REG_AENQ_HEAD_DB,
383 ena->ena_aenq.eaenq_head);
384 }
385
386 void
ena_aenq_free(ena_t * ena)387 ena_aenq_free(ena_t *ena)
388 {
389 ena_dma_free(&ena->ena_aenq.eaenq_dma);
390 }
391