1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2021 Oxide Computer Company 14 */ 15 #include "ena.h" 16 17 /* 18 * Group/Ring callbacks 19 */ 20 21 /* 22 * The ena driver supports only a single mac address: the one assigned 23 * to it by the hypervisor. If mac requests an address besides this 24 * one, then return ENOTSUP. This will prevent VNICs from being 25 * created, as it should. 26 */ 27 static int 28 ena_group_add_mac(void *arg, const uint8_t *mac_addr) 29 { 30 ena_t *ena = arg; 31 32 if (ETHER_IS_MULTICAST(mac_addr)) { 33 return (EINVAL); 34 } 35 36 if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) { 37 return (0); 38 } 39 40 return (ENOTSUP); 41 } 42 43 static int 44 ena_group_rem_mac(void *arg, const uint8_t *mac_addr) 45 { 46 ena_t *ena = arg; 47 48 if (ETHER_IS_MULTICAST(mac_addr)) { 49 return (EINVAL); 50 } 51 52 if (bcmp(ena->ena_mac_addr, mac_addr, ETHERADDRL) == 0) { 53 return (0); 54 } 55 56 return (ENOTSUP); 57 } 58 59 static int 60 ena_ring_rx_intr_disable(mac_intr_handle_t mih) 61 { 62 ena_rxq_t *rxq = (ena_rxq_t *)mih; 63 uint32_t intr_ctrl; 64 65 mutex_enter(&rxq->er_lock); 66 intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr); 67 ENAHW_REG_INTR_MASK(intr_ctrl); 68 ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl); 69 rxq->er_mode = ENA_RXQ_MODE_POLLING; 70 mutex_exit(&rxq->er_lock); 71 return (0); 72 } 73 74 static int 75 ena_ring_rx_intr_enable(mac_intr_handle_t mih) 76 { 77 ena_rxq_t *rxq = (ena_rxq_t *)mih; 78 uint32_t intr_ctrl; 79 80 mutex_enter(&rxq->er_lock); 81 intr_ctrl = ena_hw_abs_read32(rxq->er_ena, rxq->er_cq_unmask_addr); 82 ENAHW_REG_INTR_UNMASK(intr_ctrl); 83 ena_hw_abs_write32(rxq->er_ena, rxq->er_cq_unmask_addr, intr_ctrl); 84 rxq->er_mode = ENA_RXQ_MODE_INTR; 85 mutex_exit(&rxq->er_lock); 86 return (0); 87 } 88 89 static void 90 ena_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index, 91 mac_group_info_t *infop, mac_group_handle_t gh) 92 { 93 ena_t *ena = arg; 94 95 VERIFY3S(rtype, ==, MAC_RING_TYPE_RX); 96 /* 97 * Typically you pass an Rx group data structure as 98 * mgi_driver, but given we should only ever have one group we 99 * just pass the top-level ena_t. 100 */ 101 infop->mgi_driver = (mac_group_driver_t)ena; 102 infop->mgi_start = NULL; 103 infop->mgi_stop = NULL; 104 infop->mgi_addmac = ena_group_add_mac; 105 infop->mgi_remmac = ena_group_rem_mac; 106 infop->mgi_count = ena->ena_num_intrs - 1; 107 } 108 109 static void 110 ena_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index, 111 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 112 { 113 ena_t *ena = arg; 114 ena_txq_t *txq = &(ena->ena_txqs[ring_index]); 115 116 VERIFY3S(rtype, ==, MAC_RING_TYPE_TX); 117 VERIFY3S(ring_index, <, ena->ena_num_txqs); 118 /* Link driver Tx queue to mac ring handle and vice versa. */ 119 txq->et_mrh = rh; 120 infop->mri_driver = (mac_ring_driver_t)txq; 121 infop->mri_start = ena_ring_tx_start; 122 infop->mri_stop = ena_ring_tx_stop; 123 infop->mri_tx = ena_ring_tx; 124 infop->mri_stat = ena_ring_tx_stat; 125 } 126 127 static void 128 ena_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index, 129 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 130 { 131 ena_t *ena = arg; 132 ena_rxq_t *rxq = &(ena->ena_rxqs[ring_index]); 133 134 VERIFY3S(rtype, ==, MAC_RING_TYPE_RX); 135 VERIFY3S(ring_index, <, ena->ena_num_rxqs); 136 rxq->er_mrh = rh; 137 infop->mri_driver = (mac_ring_driver_t)rxq; 138 infop->mri_start = ena_ring_rx_start; 139 infop->mri_stop = ena_ring_rx_stop; 140 infop->mri_poll = ena_ring_rx_poll; 141 infop->mri_stat = ena_ring_rx_stat; 142 infop->mri_intr.mi_handle = (mac_intr_handle_t)rxq; 143 infop->mri_intr.mi_enable = ena_ring_rx_intr_enable; 144 infop->mri_intr.mi_disable = ena_ring_rx_intr_disable; 145 infop->mri_intr.mi_ddi_handle = 146 ena->ena_intr_handles[rxq->er_intr_vector]; 147 } 148 149 static int 150 ena_m_start(void *arg) 151 { 152 ena_t *ena = arg; 153 154 atomic_or_32(&ena->ena_state, ENA_STATE_RUNNING); 155 return (0); 156 } 157 158 static void 159 ena_m_stop(void *arg) 160 { 161 ena_t *ena = arg; 162 atomic_and_32(&ena->ena_state, ~ENA_STATE_RUNNING); 163 } 164 165 /* 166 * As discussed in ena_group_add_mac(), ENA only supports a single MAC 167 * address, and therefore we prevent VNICs from being created. That 168 * means there is no chance for promisc to be used as a means for 169 * implementing VNIC support on ENA, as we never allow them to be 170 * created in the first place. 171 * 172 * As for promisc itself, returning success is about the best we can 173 * do. There is no promisc API for an ENA device -- you get only the 174 * exact traffic AWS wants you to see. 175 */ 176 static int 177 ena_m_setpromisc(void *arg, boolean_t on) 178 { 179 return (0); 180 } 181 182 /* 183 * Similarly to promisc, there is no multicast API for an ENA 184 * device. 185 */ 186 static int 187 ena_m_multicast(void *arg, boolean_t add, const uint8_t *multicast_address) 188 { 189 return (0); 190 } 191 192 static boolean_t 193 ena_m_getcapab(void *arg, mac_capab_t capab, void *cap_data) 194 { 195 ena_t *ena = arg; 196 mac_capab_rings_t *cap_rings; 197 198 switch (capab) { 199 case MAC_CAPAB_RINGS: 200 cap_rings = cap_data; 201 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 202 cap_rings->mr_gaddring = NULL; 203 cap_rings->mr_gremring = NULL; 204 ASSERT3U(ena->ena_num_intrs, >=, 2); 205 206 switch (cap_rings->mr_type) { 207 case MAC_RING_TYPE_TX: 208 /* 209 * We use pseudo Tx groups for now. 210 */ 211 cap_rings->mr_gnum = 0; 212 cap_rings->mr_rnum = ena->ena_num_intrs - 1; 213 cap_rings->mr_rget = ena_fill_tx_ring; 214 break; 215 case MAC_RING_TYPE_RX: 216 cap_rings->mr_rnum = ena->ena_num_intrs - 1; 217 cap_rings->mr_rget = ena_fill_rx_ring; 218 /* 219 * The ENA device provides no means to add mac 220 * filters or set promisc mode; it's only 221 * meant to receive its pre-designated unicast 222 * address. However, we still want rings as 223 * the device does provide multiple queues and 224 * RSS. 225 */ 226 cap_rings->mr_gnum = 1; 227 cap_rings->mr_gget = ena_fill_rx_group; 228 break; 229 } 230 231 break; 232 233 case MAC_CAPAB_HCKSUM: 234 case MAC_CAPAB_LSO: 235 return (B_FALSE); 236 default: 237 return (B_FALSE); 238 } 239 240 return (B_TRUE); 241 } 242 243 static int 244 ena_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 245 uint_t pr_valsize, const void *pr_val) 246 { 247 return (ENOTSUP); 248 } 249 250 static int 251 ena_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 252 uint_t pr_valsize, void *pr_val) 253 { 254 ena_t *ena = arg; 255 int ret = 0; 256 uint64_t speed; 257 uint8_t *u8; 258 259 mutex_enter(&ena->ena_lock); 260 261 switch (pr_num) { 262 case MAC_PROP_DUPLEX: 263 if (pr_valsize < sizeof (link_duplex_t)) { 264 ret = EOVERFLOW; 265 break; 266 } 267 268 bcopy(&ena->ena_link_duplex, pr_val, sizeof (link_duplex_t)); 269 break; 270 271 case MAC_PROP_SPEED: 272 if (pr_valsize < sizeof (uint64_t)) { 273 ret = EOVERFLOW; 274 break; 275 } 276 277 speed = ena->ena_link_speed_mbits * 1000000ULL; 278 bcopy(&speed, pr_val, sizeof (speed)); 279 break; 280 281 case MAC_PROP_STATUS: 282 if (pr_valsize < sizeof (link_state_t)) { 283 ret = EOVERFLOW; 284 break; 285 } 286 287 bcopy(&ena->ena_link_state, pr_val, sizeof (link_state_t)); 288 break; 289 290 case MAC_PROP_AUTONEG: 291 if (pr_valsize < sizeof (uint8_t)) { 292 ret = EOVERFLOW; 293 break; 294 } 295 296 u8 = pr_val; 297 *u8 = (ena->ena_link_autoneg ? 0 : 1); 298 break; 299 300 case MAC_PROP_MTU: 301 if (pr_valsize < sizeof (uint32_t)) { 302 ret = EOVERFLOW; 303 break; 304 } 305 306 bcopy(&ena->ena_mtu, pr_val, sizeof (uint32_t)); 307 break; 308 309 case MAC_PROP_ADV_1000FDX_CAP: 310 case MAC_PROP_EN_1000FDX_CAP: 311 if (pr_valsize < sizeof (uint8_t)) { 312 ret = EOVERFLOW; 313 break; 314 } 315 316 u8 = pr_val; 317 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_1G) != 0; 318 break; 319 320 case MAC_PROP_ADV_2500FDX_CAP: 321 case MAC_PROP_EN_2500FDX_CAP: 322 if (pr_valsize < sizeof (uint8_t)) { 323 ret = EOVERFLOW; 324 break; 325 } 326 327 u8 = pr_val; 328 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_2_HALF_G) != 0; 329 break; 330 331 case MAC_PROP_ADV_5000FDX_CAP: 332 case MAC_PROP_EN_5000FDX_CAP: 333 if (pr_valsize < sizeof (uint8_t)) { 334 ret = EOVERFLOW; 335 break; 336 } 337 338 u8 = pr_val; 339 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_5G) != 0; 340 break; 341 342 case MAC_PROP_ADV_10GFDX_CAP: 343 case MAC_PROP_EN_10GFDX_CAP: 344 if (pr_valsize < sizeof (uint8_t)) { 345 ret = EOVERFLOW; 346 break; 347 } 348 349 u8 = pr_val; 350 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_10G) != 0; 351 break; 352 353 case MAC_PROP_ADV_25GFDX_CAP: 354 case MAC_PROP_EN_25GFDX_CAP: 355 if (pr_valsize < sizeof (uint8_t)) { 356 ret = EOVERFLOW; 357 break; 358 } 359 360 u8 = pr_val; 361 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_25G) != 0; 362 break; 363 364 case MAC_PROP_ADV_40GFDX_CAP: 365 case MAC_PROP_EN_40GFDX_CAP: 366 if (pr_valsize < sizeof (uint8_t)) { 367 ret = EOVERFLOW; 368 break; 369 } 370 371 u8 = pr_val; 372 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_40G) != 0; 373 break; 374 375 case MAC_PROP_ADV_100GFDX_CAP: 376 case MAC_PROP_EN_100GFDX_CAP: 377 if (pr_valsize < sizeof (uint8_t)) { 378 ret = EOVERFLOW; 379 break; 380 } 381 382 u8 = pr_val; 383 *u8 = (ena->ena_link_speeds & ENAHW_LINK_SPEED_100G) != 0; 384 break; 385 386 default: 387 ret = ENOTSUP; 388 break; 389 } 390 391 mutex_exit(&ena->ena_lock); 392 return (ret); 393 } 394 395 static void 396 ena_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 397 mac_prop_info_handle_t prh) 398 { 399 } 400 401 static mac_callbacks_t ena_m_callbacks = { 402 .mc_callbacks = MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO, 403 .mc_getstat = ena_m_stat, 404 .mc_start = ena_m_start, 405 .mc_stop = ena_m_stop, 406 .mc_setpromisc = ena_m_setpromisc, 407 .mc_multicst = ena_m_multicast, 408 .mc_getcapab = ena_m_getcapab, 409 .mc_setprop = ena_m_setprop, 410 .mc_getprop = ena_m_getprop, 411 .mc_propinfo = ena_m_propinfo, 412 }; 413 414 int 415 ena_mac_unregister(ena_t *ena) 416 { 417 if (ena->ena_mh == NULL) { 418 return (0); 419 } 420 421 return (mac_unregister(ena->ena_mh)); 422 } 423 424 boolean_t 425 ena_mac_register(ena_t *ena) 426 { 427 int ret; 428 mac_register_t *regp; 429 430 if ((regp = mac_alloc(MAC_VERSION)) == NULL) { 431 ena_err(ena, "failed to allocate MAC handle"); 432 return (B_FALSE); 433 } 434 435 regp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 436 regp->m_driver = ena; 437 regp->m_dip = ena->ena_dip; 438 regp->m_instance = 0; 439 regp->m_src_addr = ena->ena_mac_addr; 440 regp->m_dst_addr = NULL; 441 regp->m_callbacks = &ena_m_callbacks; 442 regp->m_min_sdu = 0; 443 regp->m_max_sdu = ena->ena_mtu; 444 regp->m_pdata = NULL; 445 regp->m_pdata_size = 0; 446 regp->m_priv_props = NULL; 447 regp->m_margin = VLAN_TAGSZ; 448 regp->m_v12n = MAC_VIRT_LEVEL1; 449 450 if ((ret = mac_register(regp, &ena->ena_mh)) != 0) { 451 ena_err(ena, "failed to register ena with mac: %d", ret); 452 } 453 454 mac_free(regp); 455 456 if (ret == 0) { 457 /* 458 * Until we get the first AENQ link change event, we 459 * do not actually know the status of the link. 460 */ 461 mac_link_update(ena->ena_mh, LINK_STATE_UNKNOWN); 462 } 463 464 return (ret == 0); 465 } 466