xref: /illumos-gate/usr/src/uts/common/io/ena/ena_gld.c (revision 590e0b5da08d7261161e979afc4bf4aa0f543574)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 #include "ena.h"
17 
18 /*
19  * Group/Ring callbacks
20  */
21 
22 /*
23  * The ena driver supports only a single mac address: the one assigned
24  * to it by the hypervisor. If mac requests an address besides this
25  * one, then return ENOTSUP. This will prevent VNICs from being
26  * created, as it should.
27  */
28 static int
29 ena_group_add_mac(void *arg, const uint8_t *mac_addr)
30 {
31 	ena_t *ena = arg;
32 
33 	if (ETHER_IS_MULTICAST(mac_addr)) {
34 		return (EINVAL);
35 	}
36 
37 	if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) {
38 		return (0);
39 	}
40 
41 	return (ENOTSUP);
42 }
43 
44 static int
45 ena_group_rem_mac(void *arg, const uint8_t *mac_addr)
46 {
47 	ena_t *ena = arg;
48 
49 	if (ETHER_IS_MULTICAST(mac_addr)) {
50 		return (EINVAL);
51 	}
52 
53 	if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) {
54 		return (0);
55 	}
56 
57 	return (ENOTSUP);
58 }
59 
60 static int
61 ena_ring_rx_intr_disable(mac_intr_handle_t mih)
62 {
63 	ena_rxq_t *rxq = (ena_rxq_t *)mih;
64 	uint32_t intr_ctrl;
65 
66 	mutex_enter(&rxq->er_lock);
67 	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
68 	ENAHW_REG_INTR_MASK(intr_ctrl);
69 	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
70 	rxq->er_mode = ENA_RXQ_MODE_POLLING;
71 	mutex_exit(&rxq->er_lock);
72 	return (0);
73 }
74 
75 static int
76 ena_ring_rx_intr_enable(mac_intr_handle_t mih)
77 {
78 	ena_rxq_t *rxq = (ena_rxq_t *)mih;
79 	uint32_t intr_ctrl;
80 
81 	mutex_enter(&rxq->er_lock);
82 	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
83 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
84 	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
85 	rxq->er_mode = ENA_RXQ_MODE_INTR;
86 	mutex_exit(&rxq->er_lock);
87 	return (0);
88 }
89 
90 static void
91 ena_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
92     mac_group_info_t *infop, mac_group_handle_t gh)
93 {
94 	ena_t *ena = arg;
95 
96 	VERIFY3S(rtype, ==, MAC_RING_TYPE_RX);
97 	/*
98 	 * Typically you pass an Rx group data structure as
99 	 * mgi_driver, but given we should only ever have one group we
100 	 * just pass the top-level ena_t.
101 	 */
102 	infop->mgi_driver = (mac_group_driver_t)ena;
103 	infop->mgi_start = NULL;
104 	infop->mgi_stop = NULL;
105 	infop->mgi_addmac = ena_group_add_mac;
106 	infop->mgi_remmac = ena_group_rem_mac;
107 	infop->mgi_count = ena->ena_num_intrs - 1;
108 }
109 
110 static void
111 ena_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
112     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
113 {
114 	ena_t *ena = arg;
115 	ena_txq_t *txq = &ena->ena_txqs[ring_index];
116 
117 	VERIFY3S(rtype, ==, MAC_RING_TYPE_TX);
118 	VERIFY3S(ring_index, <, ena->ena_num_txqs);
119 	/* Link driver Tx queue to mac ring handle and vice versa. */
120 	txq->et_mrh = rh;
121 	infop->mri_driver = (mac_ring_driver_t)txq;
122 	infop->mri_start = ena_ring_tx_start;
123 	infop->mri_stop = ena_ring_tx_stop;
124 	infop->mri_tx = ena_ring_tx;
125 	infop->mri_stat = ena_ring_tx_stat;
126 }
127 
128 static void
129 ena_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
130     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
131 {
132 	ena_t *ena = arg;
133 	ena_rxq_t *rxq = &ena->ena_rxqs[ring_index];
134 
135 	VERIFY3S(rtype, ==, MAC_RING_TYPE_RX);
136 	VERIFY3S(ring_index, <, ena->ena_num_rxqs);
137 	rxq->er_mrh = rh;
138 	infop->mri_driver = (mac_ring_driver_t)rxq;
139 	infop->mri_start = ena_ring_rx_start;
140 	infop->mri_stop = ena_ring_rx_stop;
141 	infop->mri_poll = ena_ring_rx_poll;
142 	infop->mri_stat = ena_ring_rx_stat;
143 	infop->mri_intr.mi_handle = (mac_intr_handle_t)rxq;
144 	infop->mri_intr.mi_enable = ena_ring_rx_intr_enable;
145 	infop->mri_intr.mi_disable = ena_ring_rx_intr_disable;
146 	infop->mri_intr.mi_ddi_handle =
147 	    ena->ena_intr_handles[rxq->er_intr_vector];
148 }
149 
150 static int
151 ena_m_start(void *arg)
152 {
153 	ena_t *ena = arg;
154 
155 	atomic_or_32(&ena->ena_state, ENA_STATE_STARTED);
156 	ena_enable_watchdog(ena);
157 
158 	return (0);
159 }
160 
161 static void
162 ena_m_stop(void *arg)
163 {
164 	ena_t *ena = arg;
165 
166 	ena_disable_watchdog(ena);
167 	atomic_and_32(&ena->ena_state, ~ENA_STATE_STARTED);
168 }
169 
170 /*
171  * As discussed in ena_group_add_mac(), ENA only supports a single MAC
172  * address, and therefore we prevent VNICs from being created. That
173  * means there is no chance for promisc to be used as a means for
174  * implementing VNIC support on ENA, as we never allow them to be
175  * created in the first place.
176  *
177  * As for promisc itself, returning success is about the best we can
178  * do. There is no promisc API for an ENA device -- you get only the
179  * exact traffic AWS wants you to see.
180  */
181 static int
182 ena_m_setpromisc(void *arg, boolean_t on)
183 {
184 	return (0);
185 }
186 
187 /*
188  * Similarly to promisc, there is no multicast API for an ENA
189  * device.
190  */
191 static int
192 ena_m_multicast(void *arg, boolean_t add, const uint8_t *multicast_address)
193 {
194 	return (0);
195 }
196 
197 static boolean_t
198 ena_m_getcapab(void *arg, mac_capab_t capab, void *cap_data)
199 {
200 	ena_t *ena = arg;
201 	mac_capab_rings_t *cap_rings;
202 
203 	switch (capab) {
204 	case MAC_CAPAB_RINGS:
205 		cap_rings = cap_data;
206 		cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
207 		cap_rings->mr_gaddring = NULL;
208 		cap_rings->mr_gremring = NULL;
209 		ASSERT3U(ena->ena_num_intrs, >=, 2);
210 
211 		switch (cap_rings->mr_type) {
212 		case MAC_RING_TYPE_TX:
213 			/*
214 			 * We use pseudo Tx groups for now.
215 			 */
216 			cap_rings->mr_gnum = 0;
217 			cap_rings->mr_rnum = ena->ena_num_intrs - 1;
218 			cap_rings->mr_rget = ena_fill_tx_ring;
219 			break;
220 		case MAC_RING_TYPE_RX:
221 			cap_rings->mr_rnum = ena->ena_num_intrs - 1;
222 			cap_rings->mr_rget = ena_fill_rx_ring;
223 			/*
224 			 * The ENA device provides no means to add mac
225 			 * filters or set promisc mode; it's only
226 			 * meant to receive its pre-designated unicast
227 			 * address. However, we still want rings as
228 			 * the device does provide multiple queues and
229 			 * RSS.
230 			 */
231 			cap_rings->mr_gnum = 1;
232 			cap_rings->mr_gget = ena_fill_rx_group;
233 			break;
234 		}
235 
236 		break;
237 
238 	case MAC_CAPAB_HCKSUM:
239 	case MAC_CAPAB_LSO:
240 		return (B_FALSE);
241 	default:
242 		return (B_FALSE);
243 	}
244 
245 	return (B_TRUE);
246 }
247 
248 static int
249 ena_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
250     uint_t pr_valsize, const void *pr_val)
251 {
252 	return (ENOTSUP);
253 }
254 
255 static int
256 ena_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
257     uint_t pr_valsize, void *pr_val)
258 {
259 	ena_t *ena = arg;
260 	int ret = 0;
261 	uint64_t speed;
262 	uint8_t *u8;
263 
264 	mutex_enter(&ena->ena_lock);
265 
266 	switch (pr_num) {
267 	case MAC_PROP_DUPLEX:
268 		if (pr_valsize < sizeof (link_duplex_t)) {
269 			ret = EOVERFLOW;
270 			break;
271 		}
272 
273 		bcopy(&ena->ena_link_duplex, pr_val, sizeof (link_duplex_t));
274 		break;
275 
276 	case MAC_PROP_SPEED:
277 		if (pr_valsize < sizeof (uint64_t)) {
278 			ret = EOVERFLOW;
279 			break;
280 		}
281 
282 		speed = ena->ena_link_speed_mbits * 1000000ULL;
283 		bcopy(&speed, pr_val, sizeof (speed));
284 		break;
285 
286 	case MAC_PROP_STATUS:
287 		if (pr_valsize < sizeof (link_state_t)) {
288 			ret = EOVERFLOW;
289 			break;
290 		}
291 
292 		bcopy(&ena->ena_link_state, pr_val, sizeof (link_state_t));
293 		break;
294 
295 	case MAC_PROP_AUTONEG:
296 		if (pr_valsize < sizeof (uint8_t)) {
297 			ret = EOVERFLOW;
298 			break;
299 		}
300 
301 		u8 = pr_val;
302 		*u8 = (ena->ena_link_autoneg ? 0 : 1);
303 		break;
304 
305 	case MAC_PROP_MTU:
306 		if (pr_valsize < sizeof (uint32_t)) {
307 			ret = EOVERFLOW;
308 			break;
309 		}
310 
311 		bcopy(&ena->ena_mtu, pr_val, sizeof (uint32_t));
312 		break;
313 
314 	case MAC_PROP_ADV_1000FDX_CAP:
315 	case MAC_PROP_EN_1000FDX_CAP:
316 		if (pr_valsize < sizeof (uint8_t)) {
317 			ret = EOVERFLOW;
318 			break;
319 		}
320 
321 		u8 = pr_val;
322 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_1G) != 0;
323 		break;
324 
325 	case MAC_PROP_ADV_2500FDX_CAP:
326 	case MAC_PROP_EN_2500FDX_CAP:
327 		if (pr_valsize < sizeof (uint8_t)) {
328 			ret = EOVERFLOW;
329 			break;
330 		}
331 
332 		u8 = pr_val;
333 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_2_HALF_G) != 0;
334 		break;
335 
336 	case MAC_PROP_ADV_5000FDX_CAP:
337 	case MAC_PROP_EN_5000FDX_CAP:
338 		if (pr_valsize < sizeof (uint8_t)) {
339 			ret = EOVERFLOW;
340 			break;
341 		}
342 
343 		u8 = pr_val;
344 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_5G) != 0;
345 		break;
346 
347 	case MAC_PROP_ADV_10GFDX_CAP:
348 	case MAC_PROP_EN_10GFDX_CAP:
349 		if (pr_valsize < sizeof (uint8_t)) {
350 			ret = EOVERFLOW;
351 			break;
352 		}
353 
354 		u8 = pr_val;
355 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_10G) != 0;
356 		break;
357 
358 	case MAC_PROP_ADV_25GFDX_CAP:
359 	case MAC_PROP_EN_25GFDX_CAP:
360 		if (pr_valsize < sizeof (uint8_t)) {
361 			ret = EOVERFLOW;
362 			break;
363 		}
364 
365 		u8 = pr_val;
366 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_25G) != 0;
367 		break;
368 
369 	case MAC_PROP_ADV_40GFDX_CAP:
370 	case MAC_PROP_EN_40GFDX_CAP:
371 		if (pr_valsize < sizeof (uint8_t)) {
372 			ret = EOVERFLOW;
373 			break;
374 		}
375 
376 		u8 = pr_val;
377 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_40G) != 0;
378 		break;
379 
380 	case MAC_PROP_ADV_100GFDX_CAP:
381 	case MAC_PROP_EN_100GFDX_CAP:
382 		if (pr_valsize < sizeof (uint8_t)) {
383 			ret = EOVERFLOW;
384 			break;
385 		}
386 
387 		u8 = pr_val;
388 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_100G) != 0;
389 		break;
390 
391 	default:
392 		ret = ENOTSUP;
393 		break;
394 	}
395 
396 	mutex_exit(&ena->ena_lock);
397 	return (ret);
398 }
399 
400 static void
401 ena_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
402     mac_prop_info_handle_t prh)
403 {
404 }
405 
406 static mac_callbacks_t ena_m_callbacks = {
407 	.mc_callbacks = MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO,
408 	.mc_getstat = ena_m_stat,
409 	.mc_start = ena_m_start,
410 	.mc_stop = ena_m_stop,
411 	.mc_setpromisc = ena_m_setpromisc,
412 	.mc_multicst = ena_m_multicast,
413 	.mc_getcapab = ena_m_getcapab,
414 	.mc_setprop = ena_m_setprop,
415 	.mc_getprop = ena_m_getprop,
416 	.mc_propinfo = ena_m_propinfo,
417 };
418 
419 int
420 ena_mac_unregister(ena_t *ena)
421 {
422 	if (ena->ena_mh == NULL) {
423 		return (0);
424 	}
425 
426 	return (mac_unregister(ena->ena_mh));
427 }
428 
429 bool
430 ena_mac_register(ena_t *ena)
431 {
432 	int ret;
433 	mac_register_t *regp;
434 
435 	if ((regp = mac_alloc(MAC_VERSION)) == NULL) {
436 		ena_err(ena, "failed to allocate MAC handle");
437 		return (false);
438 	}
439 
440 	regp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
441 	regp->m_driver = ena;
442 	regp->m_dip = ena->ena_dip;
443 	regp->m_instance = 0;
444 	regp->m_src_addr = ena->ena_mac_addr;
445 	regp->m_dst_addr = NULL;
446 	regp->m_callbacks = &ena_m_callbacks;
447 	regp->m_min_sdu = 0;
448 	regp->m_max_sdu = ena->ena_mtu;
449 	regp->m_pdata = NULL;
450 	regp->m_pdata_size = 0;
451 	regp->m_priv_props = NULL;
452 	regp->m_margin = VLAN_TAGSZ;
453 	regp->m_v12n = MAC_VIRT_LEVEL1;
454 
455 	if ((ret = mac_register(regp, &ena->ena_mh)) != 0) {
456 		ena_err(ena, "failed to register ena with mac: %d", ret);
457 	}
458 
459 	mac_free(regp);
460 
461 	if (ret == 0) {
462 		/*
463 		 * Until we get the first AENQ link change event, we
464 		 * do not actually know the status of the link.
465 		 */
466 		mac_link_update(ena->ena_mh, LINK_STATE_UNKNOWN);
467 	}
468 
469 	return (ret == 0);
470 }
471