xref: /illumos-gate/usr/src/uts/common/io/ena/ena_gld.c (revision 1fa2a66491e7d8ae0be84e7da4da8e812480c710)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Oxide Computer Company
14  */
15 #include "ena.h"
16 
17 /*
18  * Group/Ring callbacks
19  */
20 
21 /*
22  * The ena driver supports only a single mac address: the one assigned
23  * to it by the hypervisor. If mac requests an address besides this
24  * one, then return ENOTSUP. This will prevent VNICs from being
25  * created, as it should.
26  */
27 static int
28 ena_group_add_mac(void *arg, const uint8_t *mac_addr)
29 {
30 	ena_t *ena = arg;
31 
32 	if (ETHER_IS_MULTICAST(mac_addr)) {
33 		return (EINVAL);
34 	}
35 
36 	if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) {
37 		return (0);
38 	}
39 
40 	return (ENOTSUP);
41 }
42 
43 static int
44 ena_group_rem_mac(void *arg, const uint8_t *mac_addr)
45 {
46 	ena_t *ena = arg;
47 
48 	if (ETHER_IS_MULTICAST(mac_addr)) {
49 		return (EINVAL);
50 	}
51 
52 	if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) {
53 		return (0);
54 	}
55 
56 	return (ENOTSUP);
57 }
58 
59 static int
60 ena_ring_rx_intr_disable(mac_intr_handle_t mih)
61 {
62 	ena_rxq_t *rxq = (ena_rxq_t *)mih;
63 	uint32_t intr_ctrl;
64 
65 	mutex_enter(&rxq->er_lock);
66 	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
67 	ENAHW_REG_INTR_MASK(intr_ctrl);
68 	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
69 	rxq->er_mode = ENA_RXQ_MODE_POLLING;
70 	mutex_exit(&rxq->er_lock);
71 	return (0);
72 }
73 
74 static int
75 ena_ring_rx_intr_enable(mac_intr_handle_t mih)
76 {
77 	ena_rxq_t *rxq = (ena_rxq_t *)mih;
78 	uint32_t intr_ctrl;
79 
80 	mutex_enter(&rxq->er_lock);
81 	intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr);
82 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
83 	ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl);
84 	rxq->er_mode = ENA_RXQ_MODE_INTR;
85 	mutex_exit(&rxq->er_lock);
86 	return (0);
87 }
88 
89 static void
90 ena_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index,
91     mac_group_info_t *infop, mac_group_handle_t gh)
92 {
93 	ena_t *ena = arg;
94 
95 	VERIFY3S(rtype, ==, MAC_RING_TYPE_RX);
96 	/*
97 	 * Typically you pass an Rx group data structure as
98 	 * mgi_driver, but given we should only ever have one group we
99 	 * just pass the top-level ena_t.
100 	 */
101 	infop->mgi_driver = (mac_group_driver_t)ena;
102 	infop->mgi_start = NULL;
103 	infop->mgi_stop = NULL;
104 	infop->mgi_addmac = ena_group_add_mac;
105 	infop->mgi_remmac = ena_group_rem_mac;
106 	infop->mgi_count = ena->ena_num_intrs - 1;
107 }
108 
109 static void
110 ena_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
111     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
112 {
113 	ena_t *ena = arg;
114 	ena_txq_t *txq = &(ena->ena_txqs[ring_index]);
115 
116 	VERIFY3S(rtype, ==, MAC_RING_TYPE_TX);
117 	VERIFY3S(ring_index, <, ena->ena_num_txqs);
118 	/* Link driver Tx queue to mac ring handle and vice versa. */
119 	txq->et_mrh = rh;
120 	infop->mri_driver = (mac_ring_driver_t)txq;
121 	infop->mri_start = ena_ring_tx_start;
122 	infop->mri_stop = ena_ring_tx_stop;
123 	infop->mri_tx = ena_ring_tx;
124 	infop->mri_stat = ena_ring_tx_stat;
125 }
126 
127 static void
128 ena_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index,
129     const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh)
130 {
131 	ena_t *ena = arg;
132 	ena_rxq_t *rxq = &(ena->ena_rxqs[ring_index]);
133 
134 	VERIFY3S(rtype, ==, MAC_RING_TYPE_RX);
135 	VERIFY3S(ring_index, <, ena->ena_num_rxqs);
136 	rxq->er_mrh = rh;
137 	infop->mri_driver = (mac_ring_driver_t)rxq;
138 	infop->mri_start = ena_ring_rx_start;
139 	infop->mri_stop = ena_ring_rx_stop;
140 	infop->mri_poll = ena_ring_rx_poll;
141 	infop->mri_stat = ena_ring_rx_stat;
142 	infop->mri_intr.mi_handle = (mac_intr_handle_t)rxq;
143 	infop->mri_intr.mi_enable = ena_ring_rx_intr_enable;
144 	infop->mri_intr.mi_disable = ena_ring_rx_intr_disable;
145 	infop->mri_intr.mi_ddi_handle =
146 	    ena->ena_intr_handles[rxq->er_intr_vector];
147 }
148 
149 static int
150 ena_m_start(void *arg)
151 {
152 	ena_t *ena = arg;
153 
154 	atomic_or_32(&ena->ena_state, ENA_STATE_RUNNING);
155 	return (0);
156 }
157 
158 static void
159 ena_m_stop(void *arg)
160 {
161 	ena_t *ena = arg;
162 	atomic_and_32(&ena->ena_state, ~ENA_STATE_RUNNING);
163 }
164 
165 /*
166  * As discussed in ena_group_add_mac(), ENA only supports a single MAC
167  * address, and therefore we prevent VNICs from being created. That
168  * means there is no chance for promisc to be used as a means for
169  * implementing VNIC support on ENA, as we never allow them to be
170  * created in the first place.
171  *
172  * As for promisc itself, returning success is about the best we can
173  * do. There is no promisc API for an ENA device -- you get only the
174  * exact traffic AWS wants you to see.
175  */
176 static int
177 ena_m_setpromisc(void *arg, boolean_t on)
178 {
179 	return (0);
180 }
181 
182 /*
183  * Similarly to promisc, there is no multicast API for an ENA
184  * device.
185  */
186 static int
187 ena_m_multicast(void *arg, boolean_t add, const uint8_t *multicast_address)
188 {
189 	return (0);
190 }
191 
192 static boolean_t
193 ena_m_getcapab(void *arg, mac_capab_t capab, void *cap_data)
194 {
195 	ena_t *ena = arg;
196 	mac_capab_rings_t *cap_rings;
197 
198 	switch (capab) {
199 	case MAC_CAPAB_RINGS:
200 		cap_rings = cap_data;
201 		cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
202 		cap_rings->mr_gaddring = NULL;
203 		cap_rings->mr_gremring = NULL;
204 		ASSERT3U(ena->ena_num_intrs, >=, 2);
205 
206 		switch (cap_rings->mr_type) {
207 		case MAC_RING_TYPE_TX:
208 			/*
209 			 * We use pseudo Tx groups for now.
210 			 */
211 			cap_rings->mr_gnum = 0;
212 			cap_rings->mr_rnum = ena->ena_num_intrs - 1;
213 			cap_rings->mr_rget = ena_fill_tx_ring;
214 			break;
215 		case MAC_RING_TYPE_RX:
216 			cap_rings->mr_rnum = ena->ena_num_intrs - 1;
217 			cap_rings->mr_rget = ena_fill_rx_ring;
218 			/*
219 			 * The ENA device provides no means to add mac
220 			 * filters or set promisc mode; it's only
221 			 * meant to receive its pre-designated unicast
222 			 * address. However, we still want rings as
223 			 * the device does provide multiple queues and
224 			 * RSS.
225 			 */
226 			cap_rings->mr_gnum = 1;
227 			cap_rings->mr_gget = ena_fill_rx_group;
228 			break;
229 		}
230 
231 		break;
232 
233 	case MAC_CAPAB_HCKSUM:
234 	case MAC_CAPAB_LSO:
235 		return (B_FALSE);
236 	default:
237 		return (B_FALSE);
238 	}
239 
240 	return (B_TRUE);
241 }
242 
243 static int
244 ena_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
245     uint_t pr_valsize, const void *pr_val)
246 {
247 	return (ENOTSUP);
248 }
249 
250 static int
251 ena_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
252     uint_t pr_valsize, void *pr_val)
253 {
254 	ena_t *ena = arg;
255 	int ret = 0;
256 	uint64_t speed;
257 	uint8_t *u8;
258 
259 	mutex_enter(&ena->ena_lock);
260 
261 	switch (pr_num) {
262 	case MAC_PROP_DUPLEX:
263 		if (pr_valsize < sizeof (link_duplex_t)) {
264 			ret = EOVERFLOW;
265 			break;
266 		}
267 
268 		bcopy(&ena->ena_link_duplex, pr_val, sizeof (link_duplex_t));
269 		break;
270 
271 	case MAC_PROP_SPEED:
272 		if (pr_valsize < sizeof (uint64_t)) {
273 			ret = EOVERFLOW;
274 			break;
275 		}
276 
277 		speed = ena->ena_link_speed_mbits * 1000000ULL;
278 		bcopy(&speed, pr_val, sizeof (speed));
279 		break;
280 
281 	case MAC_PROP_STATUS:
282 		if (pr_valsize < sizeof (link_state_t)) {
283 			ret = EOVERFLOW;
284 			break;
285 		}
286 
287 		bcopy(&ena->ena_link_state, pr_val, sizeof (link_state_t));
288 		break;
289 
290 	case MAC_PROP_AUTONEG:
291 		if (pr_valsize < sizeof (uint8_t)) {
292 			ret = EOVERFLOW;
293 			break;
294 		}
295 
296 		u8 = pr_val;
297 		*u8 = (ena->ena_link_autoneg ? 0 : 1);
298 		break;
299 
300 	case MAC_PROP_MTU:
301 		if (pr_valsize < sizeof (uint32_t)) {
302 			ret = EOVERFLOW;
303 			break;
304 		}
305 
306 		bcopy(&ena->ena_mtu, pr_val, sizeof (uint32_t));
307 		break;
308 
309 	case MAC_PROP_ADV_1000FDX_CAP:
310 	case MAC_PROP_EN_1000FDX_CAP:
311 		if (pr_valsize < sizeof (uint8_t)) {
312 			ret = EOVERFLOW;
313 			break;
314 		}
315 
316 		u8 = pr_val;
317 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_1G) != 0;
318 		break;
319 
320 	case MAC_PROP_ADV_2500FDX_CAP:
321 	case MAC_PROP_EN_2500FDX_CAP:
322 		if (pr_valsize < sizeof (uint8_t)) {
323 			ret = EOVERFLOW;
324 			break;
325 		}
326 
327 		u8 = pr_val;
328 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_2_HALF_G) != 0;
329 		break;
330 
331 	case MAC_PROP_ADV_5000FDX_CAP:
332 	case MAC_PROP_EN_5000FDX_CAP:
333 		if (pr_valsize < sizeof (uint8_t)) {
334 			ret = EOVERFLOW;
335 			break;
336 		}
337 
338 		u8 = pr_val;
339 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_5G) != 0;
340 		break;
341 
342 	case MAC_PROP_ADV_10GFDX_CAP:
343 	case MAC_PROP_EN_10GFDX_CAP:
344 		if (pr_valsize < sizeof (uint8_t)) {
345 			ret = EOVERFLOW;
346 			break;
347 		}
348 
349 		u8 = pr_val;
350 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_10G) != 0;
351 		break;
352 
353 	case MAC_PROP_ADV_25GFDX_CAP:
354 	case MAC_PROP_EN_25GFDX_CAP:
355 		if (pr_valsize < sizeof (uint8_t)) {
356 			ret = EOVERFLOW;
357 			break;
358 		}
359 
360 		u8 = pr_val;
361 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_25G) != 0;
362 		break;
363 
364 	case MAC_PROP_ADV_40GFDX_CAP:
365 	case MAC_PROP_EN_40GFDX_CAP:
366 		if (pr_valsize < sizeof (uint8_t)) {
367 			ret = EOVERFLOW;
368 			break;
369 		}
370 
371 		u8 = pr_val;
372 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_40G) != 0;
373 		break;
374 
375 	case MAC_PROP_ADV_100GFDX_CAP:
376 	case MAC_PROP_EN_100GFDX_CAP:
377 		if (pr_valsize < sizeof (uint8_t)) {
378 			ret = EOVERFLOW;
379 			break;
380 		}
381 
382 		u8 = pr_val;
383 		*u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_100G) != 0;
384 		break;
385 
386 	default:
387 		ret = ENOTSUP;
388 		break;
389 	}
390 
391 	mutex_exit(&ena->ena_lock);
392 	return (ret);
393 }
394 
395 static void
396 ena_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
397     mac_prop_info_handle_t prh)
398 {
399 }
400 
401 static mac_callbacks_t ena_m_callbacks = {
402 	.mc_callbacks = MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO,
403 	.mc_getstat = ena_m_stat,
404 	.mc_start = ena_m_start,
405 	.mc_stop = ena_m_stop,
406 	.mc_setpromisc = ena_m_setpromisc,
407 	.mc_multicst = ena_m_multicast,
408 	.mc_getcapab = ena_m_getcapab,
409 	.mc_setprop = ena_m_setprop,
410 	.mc_getprop = ena_m_getprop,
411 	.mc_propinfo = ena_m_propinfo,
412 };
413 
414 int
415 ena_mac_unregister(ena_t *ena)
416 {
417 	if (ena->ena_mh == NULL) {
418 		return (0);
419 	}
420 
421 	return (mac_unregister(ena->ena_mh));
422 }
423 
424 boolean_t
425 ena_mac_register(ena_t *ena)
426 {
427 	int ret;
428 	mac_register_t *regp;
429 
430 	if ((regp = mac_alloc(MAC_VERSION)) == NULL) {
431 		ena_err(ena, "failed to allocate MAC handle");
432 		return (B_FALSE);
433 	}
434 
435 	regp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
436 	regp->m_driver = ena;
437 	regp->m_dip = ena->ena_dip;
438 	regp->m_instance = 0;
439 	regp->m_src_addr = ena->ena_mac_addr;
440 	regp->m_dst_addr = NULL;
441 	regp->m_callbacks = &ena_m_callbacks;
442 	regp->m_min_sdu = 0;
443 	regp->m_max_sdu = ena->ena_mtu;
444 	regp->m_pdata = NULL;
445 	regp->m_pdata_size = 0;
446 	regp->m_priv_props = NULL;
447 	regp->m_margin = VLAN_TAGSZ;
448 	regp->m_v12n = MAC_VIRT_LEVEL1;
449 
450 	if ((ret = mac_register(regp, &ena->ena_mh)) != 0) {
451 		ena_err(ena, "failed to register ena with mac: %d", ret);
452 	}
453 
454 	mac_free(regp);
455 
456 	if (ret == 0) {
457 		/*
458 		 * Until we get the first AENQ link change event, we
459 		 * do not actually know the status of the link.
460 		 */
461 		mac_link_update(ena->ena_mh, LINK_STATE_UNKNOWN);
462 	}
463 
464 	return (ret == 0);
465 }
466