1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2021, the University of Queensland 14 * Copyright 2020 RackTop Systems, Inc. 15 * Copyright 2023 MNX Cloud, Inc. 16 */ 17 18 /* 19 * Mellanox Connect-X 4/5/6 driver. 20 */ 21 22 #include <sys/modctl.h> 23 #include <sys/conf.h> 24 #include <sys/devops.h> 25 #include <sys/sysmacros.h> 26 #include <sys/vlan.h> 27 28 #include <sys/pattr.h> 29 #include <sys/dlpi.h> 30 31 #include <sys/mac_provider.h> 32 33 /* Need these for mac_vlan_header_info() */ 34 #include <sys/mac_client.h> 35 #include <sys/mac_client_priv.h> 36 37 #include <mlxcx.h> 38 39 static char *mlxcx_priv_props[] = { 40 NULL 41 }; 42 43 #define MBITS 1000000ULL 44 #define GBITS (1000ULL * MBITS) 45 46 static uint64_t 47 mlxcx_speed_to_bits(mlxcx_eth_proto_t proto, mlxcx_ext_eth_proto_t ext_proto) 48 { 49 /* 50 * Older parts only used "proto", but starting with ConnectX-6, there 51 * might be speeds & link-types in an extended set of proto bits. 52 * 53 * We check the old bits first because the extended bits do not report 54 * media on links (e.g. nothing like MLXCX_EXTPROTO_100GBASE_CR2 55 * for a 50Gbit lane). 56 * 57 * In the case of, e.g., 100GBASE_CR4 both proto and ext_proto have 58 * bits set, but the extended proto bits are a generic CAUI4 indicator 59 * that could be for CR4, KR4, etc. If we get a legitimate single-bit 60 * value, we don't worry about ext_proto. This may change in the face 61 * of other HW or cabling, however. 62 */ 63 switch (proto) { 64 case MLXCX_PROTO_NONE: /* Aka "0" */ 65 /* Go straight to checking ext_proto. */ 66 break; 67 case MLXCX_PROTO_SGMII_100BASE: 68 case MLXCX_PROTO_100BASE_TX: 69 return (100ULL * MBITS); 70 case MLXCX_PROTO_SGMII: 71 case MLXCX_PROTO_1000BASE_KX: 72 case MLXCX_PROTO_1000BASE_T: 73 return (1000ULL * MBITS); 74 case MLXCX_PROTO_10GBASE_CX4: 75 case MLXCX_PROTO_10GBASE_KX4: 76 case MLXCX_PROTO_10GBASE_KR: 77 case MLXCX_PROTO_10GBASE_CR: 78 case MLXCX_PROTO_10GBASE_SR: 79 case MLXCX_PROTO_10GBASE_ER_LR: 80 case MLXCX_PROTO_10GBASE_T: 81 return (10ULL * GBITS); 82 case MLXCX_PROTO_40GBASE_CR4: 83 case MLXCX_PROTO_40GBASE_KR4: 84 case MLXCX_PROTO_40GBASE_SR4: 85 case MLXCX_PROTO_40GBASE_LR4_ER4: 86 return (40ULL * GBITS); 87 case MLXCX_PROTO_25GBASE_CR: 88 case MLXCX_PROTO_25GBASE_KR: 89 case MLXCX_PROTO_25GBASE_SR: 90 return (25ULL * GBITS); 91 case MLXCX_PROTO_50GBASE_SR2: 92 case MLXCX_PROTO_50GBASE_CR2: 93 case MLXCX_PROTO_50GBASE_KR2: 94 return (50ULL * GBITS); 95 case MLXCX_PROTO_100GBASE_CR4: 96 case MLXCX_PROTO_100GBASE_SR4: 97 case MLXCX_PROTO_100GBASE_KR4: 98 case MLXCX_PROTO_100GBASE_LR4_ER4: 99 return (100ULL * GBITS); 100 default: 101 /* 102 * We've checked for 0 explicitly above, so don't worry here. 103 * 104 * There ARE legitimate single-bit values we don't support, 105 * and should just return 0 immediately. We will ASSERT() 106 * that it's a single-bit value, however, since the passed-in 107 * values are from the "operational" register, which is only 108 * supposed to have one bit set. If the assertion fails 109 * there's either a hardware error or a severe 110 * misunderstanding of the register. 111 */ 112 ASSERT0((uint32_t)proto & ((uint32_t)proto - 1U)); 113 return (0); 114 } 115 116 switch (ext_proto) { 117 case MLXCX_EXTPROTO_SGMII_100BASE: 118 return (100ULL * MBITS); 119 case MLXCX_EXTPROTO_1000BASE_X_SGMII: 120 return (1000ULL * MBITS); 121 case MLXCX_EXTPROTO_5GBASE_R: 122 return (5ULL * GBITS); 123 case MLXCX_EXTPROTO_10GBASE_XFI_XAUI_1: 124 return (10ULL * GBITS); 125 case MLXCX_EXTPROTO_40GBASE_XLAUI_4_XLPPI_4: 126 return (40ULL * GBITS); 127 case MLXCX_EXTPROTO_25GAUI_1_25GBASE_CR_KR: 128 return (25ULL * GBITS); 129 case MLXCX_EXTPROTO_50GAUI_2_LAUI_2_50GBASE_CR2_KR2: 130 case MLXCX_EXTPROTO_50GAUI_1_LAUI_1_50GBASE_CR_KR: 131 return (50ULL * GBITS); 132 case MLXCX_EXTPROTO_CAUI_4_100GBASE_CR4_KR4: 133 case MLXCX_EXTPROTO_100GAUI_2_100GBASE_CR2_KR2: 134 case MLXCX_EXTPROTO_100GAUI_1_100GBASE_CR_KR: 135 return (100ULL * GBITS); 136 case MLXCX_EXTPROTO_200GAUI_4_200GBASE_CR4_KR4: 137 case MLXCX_EXTPROTO_200GAUI_2_200GBASE_CR2_KR2: 138 return (200ULL * GBITS); 139 case MLXCX_EXTPROTO_400GAUI_8_400GBASE_CR8: 140 case MLXCX_EXTPROTO_400GAUI_4_400GBASE_CR4: 141 return (400ULL * GBITS); 142 default: 143 /* 144 * There ARE legitimate single-bit values we don't support, 145 * and should just return 0 immediately. We will ASSERT() 146 * that it's a single-bit value, however, for reasons detailed 147 * in the prior `default` case. 148 */ 149 ASSERT0((uint32_t)ext_proto & ((uint32_t)ext_proto - 1U)); 150 break; 151 } 152 153 return (0); 154 } 155 156 static link_fec_t 157 mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec) 158 { 159 if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0) 160 return (LINK_FEC_NONE); 161 162 if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0) 163 return (LINK_FEC_BASE_R); 164 165 if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 | 166 MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 | 167 MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0) 168 return (LINK_FEC_RS); 169 170 return (LINK_FEC_NONE); 171 } 172 173 static boolean_t 174 mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp) 175 { 176 mlxcx_pplm_fec_caps_t pplm_fec = 0; 177 178 if ((fec & LINK_FEC_AUTO) != 0) { 179 pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO; 180 fec &= ~LINK_FEC_AUTO; 181 } else if ((fec & LINK_FEC_NONE) != 0) { 182 pplm_fec = MLXCX_PPLM_FEC_CAP_NONE; 183 fec &= ~LINK_FEC_NONE; 184 } else if ((fec & LINK_FEC_RS) != 0) { 185 pplm_fec |= MLXCX_PPLM_FEC_CAP_RS; 186 fec &= ~LINK_FEC_RS; 187 } else if ((fec & LINK_FEC_BASE_R) != 0) { 188 pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE; 189 fec &= ~LINK_FEC_BASE_R; 190 } 191 192 /* 193 * Only one fec option is allowed. 194 */ 195 if (fec != 0) 196 return (B_FALSE); 197 198 *pfecp = pplm_fec; 199 200 return (B_TRUE); 201 } 202 203 static int 204 mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, 205 uint64_t *val) 206 { 207 int ret = 0; 208 boolean_t ok; 209 mlxcx_register_data_t data; 210 mlxcx_ppcnt_rfc_2863_t *st; 211 212 ASSERT(mutex_owned(&port->mlp_mtx)); 213 214 bzero(&data, sizeof (data)); 215 data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1; 216 data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_RFC_2863; 217 data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR; 218 219 ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, 220 MLXCX_REG_PPCNT, &data); 221 if (!ok) 222 return (EIO); 223 st = &data.mlrd_ppcnt.mlrd_ppcnt_rfc_2863; 224 225 switch (stat) { 226 case MAC_STAT_RBYTES: 227 *val = from_be64(st->mlppc_rfc_2863_in_octets); 228 break; 229 case MAC_STAT_MULTIRCV: 230 *val = from_be64(st->mlppc_rfc_2863_in_mcast_pkts); 231 break; 232 case MAC_STAT_BRDCSTRCV: 233 *val = from_be64(st->mlppc_rfc_2863_in_bcast_pkts); 234 break; 235 case MAC_STAT_MULTIXMT: 236 *val = from_be64(st->mlppc_rfc_2863_out_mcast_pkts); 237 break; 238 case MAC_STAT_BRDCSTXMT: 239 *val = from_be64(st->mlppc_rfc_2863_out_bcast_pkts); 240 break; 241 case MAC_STAT_IERRORS: 242 *val = from_be64(st->mlppc_rfc_2863_in_errors); 243 break; 244 case MAC_STAT_UNKNOWNS: 245 *val = from_be64(st->mlppc_rfc_2863_in_unknown_protos); 246 break; 247 case MAC_STAT_OERRORS: 248 *val = from_be64(st->mlppc_rfc_2863_out_errors); 249 break; 250 case MAC_STAT_OBYTES: 251 *val = from_be64(st->mlppc_rfc_2863_out_octets); 252 break; 253 default: 254 ret = ENOTSUP; 255 } 256 257 return (ret); 258 } 259 260 static int 261 mlxcx_mac_stat_ieee_802_3(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, 262 uint64_t *val) 263 { 264 int ret = 0; 265 boolean_t ok; 266 mlxcx_register_data_t data; 267 mlxcx_ppcnt_ieee_802_3_t *st; 268 269 ASSERT(mutex_owned(&port->mlp_mtx)); 270 271 bzero(&data, sizeof (data)); 272 data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1; 273 data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_IEEE_802_3; 274 data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR; 275 276 ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, 277 MLXCX_REG_PPCNT, &data); 278 if (!ok) 279 return (EIO); 280 st = &data.mlrd_ppcnt.mlrd_ppcnt_ieee_802_3; 281 282 switch (stat) { 283 case MAC_STAT_IPACKETS: 284 *val = from_be64(st->mlppc_ieee_802_3_frames_rx); 285 break; 286 case MAC_STAT_OPACKETS: 287 *val = from_be64(st->mlppc_ieee_802_3_frames_tx); 288 break; 289 case ETHER_STAT_ALIGN_ERRORS: 290 *val = from_be64(st->mlppc_ieee_802_3_align_err); 291 break; 292 case ETHER_STAT_FCS_ERRORS: 293 *val = from_be64(st->mlppc_ieee_802_3_fcs_err); 294 break; 295 case ETHER_STAT_TOOLONG_ERRORS: 296 *val = from_be64(st->mlppc_ieee_802_3_frame_too_long_err); 297 break; 298 default: 299 ret = ENOTSUP; 300 } 301 302 return (ret); 303 } 304 305 static int 306 mlxcx_mac_stat(void *arg, uint_t stat, uint64_t *val) 307 { 308 mlxcx_t *mlxp = (mlxcx_t *)arg; 309 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 310 int ret = 0; 311 312 mutex_enter(&port->mlp_mtx); 313 314 switch (stat) { 315 case MAC_STAT_IFSPEED: 316 *val = mlxcx_speed_to_bits(port->mlp_oper_proto, 317 port->mlp_ext_oper_proto); 318 break; 319 case ETHER_STAT_LINK_DUPLEX: 320 *val = LINK_DUPLEX_FULL; 321 break; 322 case MAC_STAT_RBYTES: 323 case MAC_STAT_MULTIRCV: 324 case MAC_STAT_BRDCSTRCV: 325 case MAC_STAT_MULTIXMT: 326 case MAC_STAT_BRDCSTXMT: 327 case MAC_STAT_IERRORS: 328 case MAC_STAT_UNKNOWNS: 329 case MAC_STAT_OERRORS: 330 case MAC_STAT_OBYTES: 331 ret = mlxcx_mac_stat_rfc_2863(mlxp, port, stat, val); 332 break; 333 case MAC_STAT_IPACKETS: 334 case MAC_STAT_OPACKETS: 335 case ETHER_STAT_ALIGN_ERRORS: 336 case ETHER_STAT_FCS_ERRORS: 337 case ETHER_STAT_TOOLONG_ERRORS: 338 ret = mlxcx_mac_stat_ieee_802_3(mlxp, port, stat, val); 339 break; 340 case MAC_STAT_NORCVBUF: 341 *val = port->mlp_stats.mlps_rx_drops; 342 break; 343 default: 344 ret = ENOTSUP; 345 } 346 347 mutex_exit(&port->mlp_mtx); 348 349 return (ret); 350 } 351 352 static int 353 mlxcx_mac_led_set(void *arg, mac_led_mode_t mode, uint_t flags) 354 { 355 mlxcx_t *mlxp = arg; 356 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 357 int ret = 0; 358 359 if (flags != 0) { 360 return (EINVAL); 361 } 362 363 mutex_enter(&port->mlp_mtx); 364 365 switch (mode) { 366 case MAC_LED_DEFAULT: 367 case MAC_LED_OFF: 368 if (!mlxcx_cmd_set_port_led(mlxp, port, 0)) { 369 ret = EIO; 370 break; 371 } 372 break; 373 case MAC_LED_IDENT: 374 if (!mlxcx_cmd_set_port_led(mlxp, port, UINT16_MAX)) { 375 ret = EIO; 376 break; 377 } 378 break; 379 default: 380 ret = ENOTSUP; 381 } 382 383 mutex_exit(&port->mlp_mtx); 384 385 return (ret); 386 } 387 388 static int 389 mlxcx_mac_txr_info(void *arg, uint_t id, mac_transceiver_info_t *infop) 390 { 391 mlxcx_t *mlxp = arg; 392 mlxcx_module_status_t st; 393 394 if (!mlxcx_cmd_query_module_status(mlxp, id, &st, NULL)) 395 return (EIO); 396 397 if (st != MLXCX_MODULE_UNPLUGGED) 398 mac_transceiver_info_set_present(infop, B_TRUE); 399 400 if (st == MLXCX_MODULE_PLUGGED) 401 mac_transceiver_info_set_usable(infop, B_TRUE); 402 403 return (0); 404 } 405 406 static int 407 mlxcx_mac_txr_read(void *arg, uint_t id, uint_t page, void *vbuf, 408 size_t nbytes, off_t offset, size_t *nread) 409 { 410 mlxcx_t *mlxp = arg; 411 mlxcx_register_data_t data; 412 uint8_t *buf = vbuf; 413 boolean_t ok; 414 size_t take, done = 0; 415 uint8_t i2c_addr; 416 417 if (id != 0 || vbuf == NULL || nbytes == 0 || nread == NULL) 418 return (EINVAL); 419 420 if (nbytes > 256 || offset >= 256 || (offset + nbytes > 256)) 421 return (EINVAL); 422 423 /* 424 * The PRM is really not very clear about any of this, but it seems 425 * that the i2c_device_addr field in MCIA is the SFP+ spec "page" 426 * number shifted right by 1 bit. They're written in the SFF spec 427 * like "1010000X" so Mellanox just dropped the X. 428 * 429 * This means that if we want page 0xA0, we put 0x50 in the 430 * i2c_device_addr field. 431 * 432 * The "page_number" field in MCIA means something else. Don't ask me 433 * what. FreeBSD leaves it as zero, so we will too! 434 */ 435 i2c_addr = page >> 1; 436 437 while (done < nbytes) { 438 take = nbytes - done; 439 if (take > sizeof (data.mlrd_mcia.mlrd_mcia_data)) 440 take = sizeof (data.mlrd_mcia.mlrd_mcia_data); 441 442 bzero(&data, sizeof (data)); 443 ASSERT3U(id, <=, 0xff); 444 data.mlrd_mcia.mlrd_mcia_module = (uint8_t)id; 445 data.mlrd_mcia.mlrd_mcia_i2c_device_addr = i2c_addr; 446 data.mlrd_mcia.mlrd_mcia_device_addr = to_be16(offset); 447 data.mlrd_mcia.mlrd_mcia_size = to_be16(take); 448 449 ok = mlxcx_cmd_access_register(mlxp, 450 MLXCX_CMD_ACCESS_REGISTER_READ, MLXCX_REG_MCIA, &data); 451 if (!ok) { 452 *nread = 0; 453 return (EIO); 454 } 455 456 if (data.mlrd_mcia.mlrd_mcia_status != MLXCX_MCIA_STATUS_OK) { 457 *nread = 0; 458 return (EIO); 459 } 460 461 bcopy(data.mlrd_mcia.mlrd_mcia_data, &buf[done], take); 462 463 done += take; 464 offset += take; 465 } 466 *nread = done; 467 return (0); 468 } 469 470 static int 471 mlxcx_mac_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 472 { 473 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; 474 (void) wq; 475 476 /* 477 * We should add support for using hw flow counters and such to 478 * get per-ring statistics. Not done yet though! 479 */ 480 481 switch (stat) { 482 default: 483 *val = 0; 484 return (ENOTSUP); 485 } 486 487 return (0); 488 } 489 490 static int 491 mlxcx_mac_start(void *arg) 492 { 493 mlxcx_t *mlxp = (mlxcx_t *)arg; 494 (void) mlxp; 495 return (0); 496 } 497 498 static void 499 mlxcx_mac_stop(void *arg) 500 { 501 mlxcx_t *mlxp = (mlxcx_t *)arg; 502 (void) mlxp; 503 } 504 505 static mblk_t * 506 mlxcx_mac_ring_tx(void *arg, mblk_t *mp) 507 { 508 mlxcx_work_queue_t *sq = (mlxcx_work_queue_t *)arg; 509 mlxcx_t *mlxp = sq->mlwq_mlx; 510 mlxcx_completion_queue_t *cq; 511 mlxcx_buffer_t *b; 512 mac_header_info_t mhi; 513 mblk_t *kmp, *nmp; 514 uint8_t inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN]; 515 size_t inline_hdrlen, rem, off; 516 uint32_t chkflags = 0; 517 boolean_t ok; 518 size_t take = 0; 519 uint_t bcount; 520 521 VERIFY(mp->b_next == NULL); 522 523 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &chkflags); 524 525 if (mac_vlan_header_info(mlxp->mlx_mac_hdl, mp, &mhi) != 0) { 526 /* 527 * We got given a frame without a valid L2 header on it. We 528 * can't really transmit that (mlx parts don't like it), so 529 * we will just drop it on the floor. 530 */ 531 freemsg(mp); 532 return (NULL); 533 } 534 535 inline_hdrlen = rem = mhi.mhi_hdrsize; 536 537 kmp = mp; 538 off = 0; 539 while (rem > 0) { 540 const ptrdiff_t sz = MBLKL(kmp); 541 ASSERT3S(sz, >=, 0); 542 ASSERT3U(sz, <=, SIZE_MAX); 543 take = sz; 544 if (take > rem) 545 take = rem; 546 bcopy(kmp->b_rptr, inline_hdrs + off, take); 547 rem -= take; 548 off += take; 549 if (take == sz) { 550 take = 0; 551 kmp = kmp->b_cont; 552 } 553 } 554 555 bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b); 556 if (bcount == 0) { 557 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); 558 return (mp); 559 } 560 561 mutex_enter(&sq->mlwq_mtx); 562 VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2); 563 cq = sq->mlwq_cq; 564 565 /* 566 * state is a single int, so read-only access without the CQ lock 567 * should be fine. 568 */ 569 if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) { 570 mutex_exit(&sq->mlwq_mtx); 571 mlxcx_buf_return_chain(mlxp, b, B_FALSE); 572 return (NULL); 573 } 574 575 if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) != 576 MLXCX_WQ_STARTED) { 577 mutex_exit(&sq->mlwq_mtx); 578 mlxcx_buf_return_chain(mlxp, b, B_FALSE); 579 return (NULL); 580 } 581 582 /* 583 * If the completion queue buffer count is already at or above 584 * the high water mark, or the addition of this new chain will 585 * exceed the CQ ring size, then indicate we are blocked. 586 */ 587 if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm || 588 (cq->mlcq_bufcnt + bcount) > cq->mlcq_nents) { 589 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); 590 goto blocked; 591 } 592 593 if (sq->mlwq_wqebb_used >= sq->mlwq_bufhwm) { 594 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); 595 goto blocked; 596 } 597 598 ok = mlxcx_sq_add_buffer(mlxp, sq, inline_hdrs, inline_hdrlen, 599 chkflags, b); 600 if (!ok) { 601 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); 602 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); 603 goto blocked; 604 } 605 606 /* 607 * Now that we've successfully enqueued the rest of the packet, 608 * free any mblks that we cut off while inlining headers. 609 */ 610 for (; mp != kmp; mp = nmp) { 611 nmp = mp->b_cont; 612 freeb(mp); 613 } 614 615 mutex_exit(&sq->mlwq_mtx); 616 617 return (NULL); 618 619 blocked: 620 mutex_exit(&sq->mlwq_mtx); 621 mlxcx_buf_return_chain(mlxp, b, B_TRUE); 622 return (mp); 623 } 624 625 static int 626 mlxcx_mac_setpromisc(void *arg, boolean_t on) 627 { 628 mlxcx_t *mlxp = (mlxcx_t *)arg; 629 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 630 mlxcx_flow_group_t *fg; 631 mlxcx_flow_entry_t *fe; 632 mlxcx_flow_table_t *ft; 633 mlxcx_ring_group_t *g; 634 int ret = 0; 635 uint_t idx; 636 637 mutex_enter(&port->mlp_mtx); 638 639 /* 640 * First, do the top-level flow entry on the root flow table for 641 * the port. This catches all traffic that doesn't match any MAC 642 * MAC filters. 643 */ 644 ft = port->mlp_rx_flow; 645 mutex_enter(&ft->mlft_mtx); 646 fg = port->mlp_promisc; 647 fe = list_head(&fg->mlfg_entries); 648 if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 649 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 650 ret = EIO; 651 } 652 } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 653 if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { 654 ret = EIO; 655 } 656 } 657 mutex_exit(&ft->mlft_mtx); 658 659 /* 660 * If we failed to change the top-level entry, don't bother with 661 * trying the per-group ones. 662 */ 663 if (ret != 0) { 664 mutex_exit(&port->mlp_mtx); 665 return (ret); 666 } 667 668 /* 669 * Then, do the per-rx-group flow entries which catch traffic that 670 * matched a MAC filter but failed to match a VLAN filter. 671 */ 672 for (idx = 0; idx < mlxp->mlx_rx_ngroups; ++idx) { 673 g = &mlxp->mlx_rx_groups[idx]; 674 675 mutex_enter(&g->mlg_mtx); 676 677 ft = g->mlg_rx_vlan_ft; 678 mutex_enter(&ft->mlft_mtx); 679 680 fg = g->mlg_rx_vlan_promisc_fg; 681 fe = list_head(&fg->mlfg_entries); 682 if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 683 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 684 ret = EIO; 685 } 686 } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 687 if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { 688 ret = EIO; 689 } 690 } 691 692 mutex_exit(&ft->mlft_mtx); 693 mutex_exit(&g->mlg_mtx); 694 } 695 696 mutex_exit(&port->mlp_mtx); 697 return (ret); 698 } 699 700 static int 701 mlxcx_mac_multicast(void *arg, boolean_t add, const uint8_t *addr) 702 { 703 mlxcx_t *mlxp = (mlxcx_t *)arg; 704 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 705 mlxcx_ring_group_t *g = &mlxp->mlx_rx_groups[0]; 706 int ret = 0; 707 708 mutex_enter(&port->mlp_mtx); 709 mutex_enter(&g->mlg_mtx); 710 if (add) { 711 if (!mlxcx_add_umcast_entry(mlxp, port, g, addr)) { 712 ret = EIO; 713 } 714 } else { 715 if (!mlxcx_remove_umcast_entry(mlxp, port, g, addr)) { 716 ret = EIO; 717 } 718 } 719 mutex_exit(&g->mlg_mtx); 720 mutex_exit(&port->mlp_mtx); 721 return (ret); 722 } 723 724 static int 725 mlxcx_group_add_mac(void *arg, const uint8_t *mac_addr) 726 { 727 mlxcx_ring_group_t *g = arg; 728 mlxcx_t *mlxp = g->mlg_mlx; 729 mlxcx_port_t *port = g->mlg_port; 730 int ret = 0; 731 732 mutex_enter(&port->mlp_mtx); 733 mutex_enter(&g->mlg_mtx); 734 if (!mlxcx_add_umcast_entry(mlxp, port, g, mac_addr)) { 735 ret = EIO; 736 } 737 mutex_exit(&g->mlg_mtx); 738 mutex_exit(&port->mlp_mtx); 739 740 return (ret); 741 } 742 743 static int 744 mlxcx_group_add_vlan(mac_group_driver_t gh, uint16_t vid) 745 { 746 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; 747 mlxcx_t *mlxp = g->mlg_mlx; 748 int ret = 0; 749 boolean_t tagged = B_TRUE; 750 751 if (vid == MAC_VLAN_UNTAGGED) { 752 vid = 0; 753 tagged = B_FALSE; 754 } 755 756 mutex_enter(&g->mlg_mtx); 757 if (!mlxcx_add_vlan_entry(mlxp, g, tagged, vid)) { 758 ret = EIO; 759 } 760 mutex_exit(&g->mlg_mtx); 761 762 return (ret); 763 } 764 765 static int 766 mlxcx_group_remove_vlan(mac_group_driver_t gh, uint16_t vid) 767 { 768 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; 769 mlxcx_t *mlxp = g->mlg_mlx; 770 int ret = 0; 771 boolean_t tagged = B_TRUE; 772 773 if (vid == MAC_VLAN_UNTAGGED) { 774 vid = 0; 775 tagged = B_FALSE; 776 } 777 778 mutex_enter(&g->mlg_mtx); 779 if (!mlxcx_remove_vlan_entry(mlxp, g, tagged, vid)) { 780 ret = EIO; 781 } 782 mutex_exit(&g->mlg_mtx); 783 784 return (ret); 785 } 786 787 static int 788 mlxcx_group_remove_mac(void *arg, const uint8_t *mac_addr) 789 { 790 mlxcx_ring_group_t *g = arg; 791 mlxcx_t *mlxp = g->mlg_mlx; 792 mlxcx_port_t *port = g->mlg_port; 793 int ret = 0; 794 795 mutex_enter(&port->mlp_mtx); 796 mutex_enter(&g->mlg_mtx); 797 if (!mlxcx_remove_umcast_entry(mlxp, port, g, mac_addr)) { 798 ret = EIO; 799 } 800 mutex_exit(&g->mlg_mtx); 801 mutex_exit(&port->mlp_mtx); 802 803 return (ret); 804 } 805 806 static int 807 mlxcx_mac_ring_start(mac_ring_driver_t rh, uint64_t gen_num) 808 { 809 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; 810 mlxcx_completion_queue_t *cq = wq->mlwq_cq; 811 mlxcx_ring_group_t *g = wq->mlwq_group; 812 mlxcx_t *mlxp = wq->mlwq_mlx; 813 814 ASSERT(cq != NULL); 815 ASSERT(g != NULL); 816 817 ASSERT(wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ || 818 wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ); 819 if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && 820 !mlxcx_tx_ring_start(mlxp, g, wq)) 821 return (EIO); 822 if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && 823 !mlxcx_rx_ring_start(mlxp, g, wq)) 824 return (EIO); 825 826 mutex_enter(&cq->mlcq_mtx); 827 cq->mlcq_mac_gen = gen_num; 828 mutex_exit(&cq->mlcq_mtx); 829 830 return (0); 831 } 832 833 static void 834 mlxcx_mac_ring_stop(mac_ring_driver_t rh) 835 { 836 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; 837 mlxcx_completion_queue_t *cq = wq->mlwq_cq; 838 mlxcx_t *mlxp = wq->mlwq_mlx; 839 mlxcx_buf_shard_t *s; 840 mlxcx_buffer_t *buf; 841 842 /* 843 * To prevent deadlocks and sleeping whilst holding either the 844 * CQ mutex or WQ mutex, we split the stop processing into two 845 * parts. 846 * 847 * With the CQ amd WQ mutexes held the appropriate WQ is stopped. 848 * The Q in the HCA is set to Reset state and flagged as no 849 * longer started. Atomic with changing this WQ state, the buffer 850 * shards are flagged as draining. 851 * 852 * Now, any requests for buffers and attempts to submit messages 853 * will fail and once we're in this state it is safe to relinquish 854 * the CQ and WQ mutexes. Allowing us to complete the ring stop 855 * by waiting for the buffer lists, with the exception of 856 * the loaned list, to drain. Buffers on the loaned list are 857 * not under our control, we will get them back when the mblk tied 858 * to the buffer is freed. 859 */ 860 861 mutex_enter(&cq->mlcq_mtx); 862 mutex_enter(&wq->mlwq_mtx); 863 864 if (wq->mlwq_state & MLXCX_WQ_STARTED) { 865 if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && 866 !mlxcx_cmd_stop_rq(mlxp, wq)) { 867 mutex_exit(&wq->mlwq_mtx); 868 mutex_exit(&cq->mlcq_mtx); 869 return; 870 } 871 if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && 872 !mlxcx_cmd_stop_sq(mlxp, wq)) { 873 mutex_exit(&wq->mlwq_mtx); 874 mutex_exit(&cq->mlcq_mtx); 875 return; 876 } 877 } 878 ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED); 879 880 mlxcx_shard_draining(wq->mlwq_bufs); 881 if (wq->mlwq_foreign_bufs != NULL) 882 mlxcx_shard_draining(wq->mlwq_foreign_bufs); 883 884 885 if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { 886 list_t cq_buffers; 887 888 /* 889 * Take the buffers away from the CQ. If the CQ is being 890 * processed and the WQ has been stopped, a completion 891 * which does not match to a buffer will be ignored. 892 */ 893 list_create(&cq_buffers, sizeof (mlxcx_buffer_t), 894 offsetof(mlxcx_buffer_t, mlb_cq_entry)); 895 896 list_move_tail(&cq_buffers, &cq->mlcq_buffers); 897 898 mutex_enter(&cq->mlcq_bufbmtx); 899 list_move_tail(&cq_buffers, &cq->mlcq_buffers_b); 900 mutex_exit(&cq->mlcq_bufbmtx); 901 902 cq->mlcq_bufcnt = 0; 903 904 mutex_exit(&wq->mlwq_mtx); 905 mutex_exit(&cq->mlcq_mtx); 906 907 /* Return any outstanding buffers to the free pool. */ 908 while ((buf = list_remove_head(&cq_buffers)) != NULL) { 909 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 910 } 911 list_destroy(&cq_buffers); 912 913 s = wq->mlwq_bufs; 914 mutex_enter(&s->mlbs_mtx); 915 while (!list_is_empty(&s->mlbs_busy)) 916 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); 917 while ((buf = list_head(&s->mlbs_free)) != NULL) { 918 mlxcx_buf_destroy(mlxp, buf); 919 } 920 mutex_exit(&s->mlbs_mtx); 921 922 s = wq->mlwq_foreign_bufs; 923 if (s != NULL) { 924 mutex_enter(&s->mlbs_mtx); 925 while (!list_is_empty(&s->mlbs_busy)) 926 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); 927 while ((buf = list_head(&s->mlbs_free)) != NULL) { 928 mlxcx_buf_destroy(mlxp, buf); 929 } 930 mutex_exit(&s->mlbs_mtx); 931 } 932 933 mutex_enter(&wq->mlwq_mtx); 934 wq->mlwq_state &= ~MLXCX_WQ_BUFFERS; 935 mutex_exit(&wq->mlwq_mtx); 936 } else { 937 mutex_exit(&wq->mlwq_mtx); 938 mutex_exit(&cq->mlcq_mtx); 939 } 940 } 941 942 static int 943 mlxcx_mac_group_start(mac_group_driver_t gh) 944 { 945 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; 946 mlxcx_t *mlxp = g->mlg_mlx; 947 948 VERIFY3S(g->mlg_type, ==, MLXCX_GROUP_RX); 949 ASSERT(mlxp != NULL); 950 951 if (g->mlg_state & MLXCX_GROUP_RUNNING) 952 return (0); 953 954 if (!mlxcx_rx_group_start(mlxp, g)) 955 return (EIO); 956 957 return (0); 958 } 959 960 static void 961 mlxcx_mac_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index, 962 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 963 { 964 mlxcx_t *mlxp = (mlxcx_t *)arg; 965 mlxcx_ring_group_t *g; 966 mlxcx_work_queue_t *wq; 967 mac_intr_t *mintr = &infop->mri_intr; 968 969 if (rtype != MAC_RING_TYPE_TX) 970 return; 971 ASSERT3S(group_index, ==, -1); 972 973 g = &mlxp->mlx_tx_groups[0]; 974 ASSERT(g->mlg_state & MLXCX_GROUP_INIT); 975 mutex_enter(&g->mlg_mtx); 976 977 ASSERT3S(ring_index, >=, 0); 978 ASSERT3S(ring_index, <, g->mlg_nwqs); 979 980 wq = &g->mlg_wqs[ring_index]; 981 982 wq->mlwq_cq->mlcq_mac_hdl = rh; 983 984 infop->mri_driver = (mac_ring_driver_t)wq; 985 infop->mri_start = mlxcx_mac_ring_start; 986 infop->mri_stop = mlxcx_mac_ring_stop; 987 infop->mri_tx = mlxcx_mac_ring_tx; 988 infop->mri_stat = mlxcx_mac_ring_stat; 989 990 mintr->mi_ddi_handle = mlxp->mlx_intr_handles[ 991 wq->mlwq_cq->mlcq_eq->mleq_intr_index]; 992 993 mutex_exit(&g->mlg_mtx); 994 } 995 996 static int 997 mlxcx_mac_ring_intr_enable(mac_intr_handle_t intrh) 998 { 999 mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh; 1000 mlxcx_t *mlxp = cq->mlcq_mlx; 1001 1002 /* 1003 * We are going to call mlxcx_arm_cq() here, so we take the arm lock 1004 * as well as the CQ one to make sure we don't race against 1005 * mlxcx_intr_n(). 1006 */ 1007 mutex_enter(&cq->mlcq_arm_mtx); 1008 mutex_enter(&cq->mlcq_mtx); 1009 if (cq->mlcq_state & MLXCX_CQ_POLLING) { 1010 atomic_and_uint(&cq->mlcq_state, ~MLXCX_CQ_POLLING); 1011 if (!(cq->mlcq_state & MLXCX_CQ_ARMED)) 1012 mlxcx_arm_cq(mlxp, cq); 1013 } 1014 mutex_exit(&cq->mlcq_mtx); 1015 mutex_exit(&cq->mlcq_arm_mtx); 1016 1017 return (0); 1018 } 1019 1020 static int 1021 mlxcx_mac_ring_intr_disable(mac_intr_handle_t intrh) 1022 { 1023 mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh; 1024 1025 mutex_enter(&cq->mlcq_mtx); 1026 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_POLLING); 1027 mutex_exit(&cq->mlcq_mtx); 1028 1029 return (0); 1030 } 1031 1032 static mblk_t * 1033 mlxcx_mac_ring_rx_poll(void *arg, int poll_bytes) 1034 { 1035 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)arg; 1036 mlxcx_completion_queue_t *cq = wq->mlwq_cq; 1037 mlxcx_t *mlxp = wq->mlwq_mlx; 1038 mblk_t *mp; 1039 1040 ASSERT(cq != NULL); 1041 ASSERT3S(poll_bytes, >, 0); 1042 if (poll_bytes == 0) 1043 return (NULL); 1044 1045 mutex_enter(&cq->mlcq_mtx); 1046 mp = mlxcx_rx_poll(mlxp, cq, poll_bytes); 1047 mutex_exit(&cq->mlcq_mtx); 1048 1049 return (mp); 1050 } 1051 1052 static void 1053 mlxcx_mac_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index, 1054 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 1055 { 1056 mlxcx_t *mlxp = (mlxcx_t *)arg; 1057 mlxcx_ring_group_t *g; 1058 mlxcx_work_queue_t *wq; 1059 mac_intr_t *mintr = &infop->mri_intr; 1060 1061 if (rtype != MAC_RING_TYPE_RX) 1062 return; 1063 ASSERT3S(group_index, >=, 0); 1064 ASSERT3S(group_index, <, mlxp->mlx_rx_ngroups); 1065 1066 g = &mlxp->mlx_rx_groups[group_index]; 1067 ASSERT(g->mlg_state & MLXCX_GROUP_INIT); 1068 mutex_enter(&g->mlg_mtx); 1069 1070 ASSERT3S(ring_index, >=, 0); 1071 ASSERT3S(ring_index, <, g->mlg_nwqs); 1072 1073 ASSERT(g->mlg_state & MLXCX_GROUP_WQS); 1074 wq = &g->mlg_wqs[ring_index]; 1075 1076 wq->mlwq_cq->mlcq_mac_hdl = rh; 1077 1078 infop->mri_driver = (mac_ring_driver_t)wq; 1079 infop->mri_start = mlxcx_mac_ring_start; 1080 infop->mri_stop = mlxcx_mac_ring_stop; 1081 infop->mri_poll = mlxcx_mac_ring_rx_poll; 1082 infop->mri_stat = mlxcx_mac_ring_stat; 1083 1084 mintr->mi_handle = (mac_intr_handle_t)wq->mlwq_cq; 1085 mintr->mi_enable = mlxcx_mac_ring_intr_enable; 1086 mintr->mi_disable = mlxcx_mac_ring_intr_disable; 1087 1088 mintr->mi_ddi_handle = mlxp->mlx_intr_handles[ 1089 wq->mlwq_cq->mlcq_eq->mleq_intr_index]; 1090 1091 mutex_exit(&g->mlg_mtx); 1092 } 1093 1094 static void 1095 mlxcx_mac_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index, 1096 mac_group_info_t *infop, mac_group_handle_t gh) 1097 { 1098 mlxcx_t *mlxp = (mlxcx_t *)arg; 1099 mlxcx_ring_group_t *g; 1100 1101 if (rtype != MAC_RING_TYPE_RX) 1102 return; 1103 1104 ASSERT3S(index, >=, 0); 1105 ASSERT3S(index, <, mlxp->mlx_rx_ngroups); 1106 g = &mlxp->mlx_rx_groups[index]; 1107 ASSERT(g->mlg_state & MLXCX_GROUP_INIT); 1108 1109 g->mlg_mac_hdl = gh; 1110 1111 infop->mgi_driver = (mac_group_driver_t)g; 1112 infop->mgi_start = mlxcx_mac_group_start; 1113 infop->mgi_stop = NULL; 1114 infop->mgi_addmac = mlxcx_group_add_mac; 1115 infop->mgi_remmac = mlxcx_group_remove_mac; 1116 infop->mgi_addvlan = mlxcx_group_add_vlan; 1117 infop->mgi_remvlan = mlxcx_group_remove_vlan; 1118 1119 infop->mgi_count = g->mlg_nwqs; 1120 } 1121 1122 static boolean_t 1123 mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data) 1124 { 1125 mlxcx_t *mlxp = (mlxcx_t *)arg; 1126 mac_capab_rings_t *cap_rings; 1127 mac_capab_led_t *cap_leds; 1128 mac_capab_transceiver_t *cap_txr; 1129 uint_t i, n = 0; 1130 1131 switch (cap) { 1132 1133 case MAC_CAPAB_RINGS: 1134 cap_rings = cap_data; 1135 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 1136 switch (cap_rings->mr_type) { 1137 case MAC_RING_TYPE_TX: 1138 cap_rings->mr_gnum = 0; 1139 cap_rings->mr_rnum = mlxp->mlx_tx_groups[0].mlg_nwqs; 1140 cap_rings->mr_rget = mlxcx_mac_fill_tx_ring; 1141 cap_rings->mr_gget = NULL; 1142 cap_rings->mr_gaddring = NULL; 1143 cap_rings->mr_gremring = NULL; 1144 break; 1145 case MAC_RING_TYPE_RX: 1146 cap_rings->mr_gnum = mlxp->mlx_rx_ngroups; 1147 for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) 1148 n += mlxp->mlx_rx_groups[i].mlg_nwqs; 1149 cap_rings->mr_rnum = n; 1150 cap_rings->mr_rget = mlxcx_mac_fill_rx_ring; 1151 cap_rings->mr_gget = mlxcx_mac_fill_rx_group; 1152 cap_rings->mr_gaddring = NULL; 1153 cap_rings->mr_gremring = NULL; 1154 break; 1155 default: 1156 return (B_FALSE); 1157 } 1158 break; 1159 1160 case MAC_CAPAB_HCKSUM: 1161 if (mlxp->mlx_caps->mlc_checksum) { 1162 *(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 | 1163 HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM; 1164 } 1165 break; 1166 1167 case MAC_CAPAB_LED: 1168 cap_leds = cap_data; 1169 1170 cap_leds->mcl_flags = 0; 1171 cap_leds->mcl_modes = MAC_LED_DEFAULT | MAC_LED_OFF | 1172 MAC_LED_IDENT; 1173 cap_leds->mcl_set = mlxcx_mac_led_set; 1174 break; 1175 1176 case MAC_CAPAB_TRANSCEIVER: 1177 cap_txr = cap_data; 1178 1179 cap_txr->mct_flags = 0; 1180 cap_txr->mct_ntransceivers = 1; 1181 cap_txr->mct_info = mlxcx_mac_txr_info; 1182 cap_txr->mct_read = mlxcx_mac_txr_read; 1183 break; 1184 1185 default: 1186 return (B_FALSE); 1187 } 1188 1189 return (B_TRUE); 1190 } 1191 1192 static void 1193 mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1194 mac_prop_info_handle_t prh) 1195 { 1196 mlxcx_t *mlxp = (mlxcx_t *)arg; 1197 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 1198 1199 mutex_enter(&port->mlp_mtx); 1200 1201 switch (pr_num) { 1202 case MAC_PROP_DUPLEX: 1203 case MAC_PROP_SPEED: 1204 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1205 break; 1206 case MAC_PROP_MTU: 1207 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1208 mac_prop_info_set_range_uint32(prh, MLXCX_MTU_OFFSET, 1209 port->mlp_max_mtu); 1210 mac_prop_info_set_default_uint32(prh, 1211 port->mlp_mtu - MLXCX_MTU_OFFSET); 1212 break; 1213 case MAC_PROP_AUTONEG: 1214 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1215 mac_prop_info_set_default_uint8(prh, 1); 1216 break; 1217 case MAC_PROP_ADV_FEC_CAP: 1218 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1219 mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO); 1220 break; 1221 case MAC_PROP_EN_FEC_CAP: 1222 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1223 mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO); 1224 break; 1225 case MAC_PROP_ADV_400GFDX_CAP: 1226 case MAC_PROP_EN_400GFDX_CAP: 1227 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1228 mac_prop_info_set_default_uint8(prh, 1229 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_400G) != 0); 1230 break; 1231 case MAC_PROP_ADV_200GFDX_CAP: 1232 case MAC_PROP_EN_200GFDX_CAP: 1233 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1234 mac_prop_info_set_default_uint8(prh, 1235 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_200G) != 0); 1236 break; 1237 case MAC_PROP_ADV_100GFDX_CAP: 1238 case MAC_PROP_EN_100GFDX_CAP: 1239 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1240 mac_prop_info_set_default_uint8(prh, 1241 ((port->mlp_oper_proto & MLXCX_PROTO_100G) != 0 || 1242 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_100G)) != 0); 1243 break; 1244 case MAC_PROP_ADV_50GFDX_CAP: 1245 case MAC_PROP_EN_50GFDX_CAP: 1246 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1247 mac_prop_info_set_default_uint8(prh, 1248 ((port->mlp_oper_proto & MLXCX_PROTO_50G) != 0 || 1249 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_50G)) != 0); 1250 break; 1251 case MAC_PROP_ADV_40GFDX_CAP: 1252 case MAC_PROP_EN_40GFDX_CAP: 1253 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1254 mac_prop_info_set_default_uint8(prh, 1255 ((port->mlp_oper_proto & MLXCX_PROTO_40G) != 0 || 1256 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_40G)) != 0); 1257 break; 1258 case MAC_PROP_ADV_25GFDX_CAP: 1259 case MAC_PROP_EN_25GFDX_CAP: 1260 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1261 mac_prop_info_set_default_uint8(prh, 1262 ((port->mlp_oper_proto & MLXCX_PROTO_25G) != 0 || 1263 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_25G)) != 0); 1264 break; 1265 case MAC_PROP_ADV_10GFDX_CAP: 1266 case MAC_PROP_EN_10GFDX_CAP: 1267 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1268 mac_prop_info_set_default_uint8(prh, 1269 ((port->mlp_oper_proto & MLXCX_PROTO_10G) != 0 || 1270 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_10G)) != 0); 1271 break; 1272 case MAC_PROP_ADV_1000FDX_CAP: 1273 case MAC_PROP_EN_1000FDX_CAP: 1274 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1275 mac_prop_info_set_default_uint8(prh, 1276 ((port->mlp_oper_proto & MLXCX_PROTO_1G) != 0 || 1277 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_1G)) != 0); 1278 break; 1279 case MAC_PROP_ADV_100FDX_CAP: 1280 case MAC_PROP_EN_100FDX_CAP: 1281 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1282 mac_prop_info_set_default_uint8(prh, 1283 ((port->mlp_oper_proto & MLXCX_PROTO_100M) != 0 || 1284 (port->mlp_ext_oper_proto & MLXCX_EXTPROTO_100M)) != 0); 1285 break; 1286 default: 1287 break; 1288 } 1289 1290 mutex_exit(&port->mlp_mtx); 1291 } 1292 1293 static int 1294 mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1295 uint_t pr_valsize, const void *pr_val) 1296 { 1297 mlxcx_t *mlxp = (mlxcx_t *)arg; 1298 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 1299 int ret = 0; 1300 uint32_t new_mtu, new_hw_mtu, old_mtu; 1301 mlxcx_buf_shard_t *sh; 1302 boolean_t allocd = B_FALSE; 1303 boolean_t relink = B_FALSE; 1304 link_fec_t fec; 1305 mlxcx_pplm_fec_caps_t cap_fec; 1306 1307 mutex_enter(&port->mlp_mtx); 1308 1309 switch (pr_num) { 1310 case MAC_PROP_MTU: 1311 bcopy(pr_val, &new_mtu, sizeof (new_mtu)); 1312 new_hw_mtu = new_mtu + MLXCX_MTU_OFFSET; 1313 if (new_hw_mtu == port->mlp_mtu) 1314 break; 1315 if (new_hw_mtu > port->mlp_max_mtu) { 1316 ret = EINVAL; 1317 break; 1318 } 1319 sh = list_head(&mlxp->mlx_buf_shards); 1320 for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) { 1321 mutex_enter(&sh->mlbs_mtx); 1322 if (!list_is_empty(&sh->mlbs_free) || 1323 !list_is_empty(&sh->mlbs_busy) || 1324 !list_is_empty(&sh->mlbs_loaned)) { 1325 allocd = B_TRUE; 1326 mutex_exit(&sh->mlbs_mtx); 1327 break; 1328 } 1329 mutex_exit(&sh->mlbs_mtx); 1330 } 1331 if (allocd) { 1332 ret = EBUSY; 1333 break; 1334 } 1335 old_mtu = port->mlp_mtu; 1336 ret = mac_maxsdu_update(mlxp->mlx_mac_hdl, new_mtu); 1337 if (ret != 0) 1338 break; 1339 port->mlp_mtu = new_hw_mtu; 1340 if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, port, 1341 MLXCX_MODIFY_NIC_VPORT_CTX_MTU)) { 1342 port->mlp_mtu = old_mtu; 1343 (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu); 1344 ret = EIO; 1345 break; 1346 } 1347 if (!mlxcx_cmd_set_port_mtu(mlxp, port)) { 1348 port->mlp_mtu = old_mtu; 1349 (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu); 1350 ret = EIO; 1351 break; 1352 } 1353 break; 1354 1355 case MAC_PROP_EN_FEC_CAP: 1356 bcopy(pr_val, &fec, sizeof (fec)); 1357 if (!mlxcx_link_fec_cap(fec, &cap_fec)) { 1358 ret = EINVAL; 1359 break; 1360 } 1361 1362 /* 1363 * Don't change the FEC if it is already at the requested 1364 * setting AND the port is up. 1365 * When the port is down, always set the FEC and attempt 1366 * to retrain the link. 1367 */ 1368 if (fec == port->mlp_fec_requested && 1369 fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) && 1370 port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN) 1371 break; 1372 1373 /* 1374 * The most like cause of this failing is an invalid 1375 * or unsupported fec option. 1376 */ 1377 if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) { 1378 ret = EINVAL; 1379 break; 1380 } 1381 1382 port->mlp_fec_requested = fec; 1383 1384 /* 1385 * For FEC to become effective, the link needs to go back 1386 * to training and negotiation state. This happens when 1387 * the link transitions from down to up, force a relink. 1388 */ 1389 relink = B_TRUE; 1390 break; 1391 1392 default: 1393 ret = ENOTSUP; 1394 break; 1395 } 1396 1397 if (relink) { 1398 if (!mlxcx_cmd_modify_port_status(mlxp, port, 1399 MLXCX_PORT_STATUS_DOWN) || 1400 !mlxcx_cmd_modify_port_status(mlxp, port, 1401 MLXCX_PORT_STATUS_UP)) { 1402 ret = EIO; 1403 } 1404 } 1405 mutex_exit(&port->mlp_mtx); 1406 1407 return (ret); 1408 } 1409 1410 static int 1411 mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1412 uint_t pr_valsize, void *pr_val) 1413 { 1414 mlxcx_t *mlxp = (mlxcx_t *)arg; 1415 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 1416 uint64_t speed; 1417 int ret = 0; 1418 1419 mutex_enter(&port->mlp_mtx); 1420 1421 switch (pr_num) { 1422 case MAC_PROP_DUPLEX: 1423 if (pr_valsize < sizeof (link_duplex_t)) { 1424 ret = EOVERFLOW; 1425 break; 1426 } 1427 /* connectx parts only support full duplex */ 1428 *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL; 1429 break; 1430 case MAC_PROP_SPEED: 1431 if (pr_valsize < sizeof (uint64_t)) { 1432 ret = EOVERFLOW; 1433 break; 1434 } 1435 speed = mlxcx_speed_to_bits(port->mlp_oper_proto, 1436 port->mlp_ext_oper_proto); 1437 bcopy(&speed, pr_val, sizeof (speed)); 1438 break; 1439 case MAC_PROP_STATUS: 1440 if (pr_valsize < sizeof (link_state_t)) { 1441 ret = EOVERFLOW; 1442 break; 1443 } 1444 switch (port->mlp_oper_status) { 1445 case MLXCX_PORT_STATUS_UP: 1446 case MLXCX_PORT_STATUS_UP_ONCE: 1447 *(link_state_t *)pr_val = LINK_STATE_UP; 1448 break; 1449 case MLXCX_PORT_STATUS_DOWN: 1450 *(link_state_t *)pr_val = LINK_STATE_DOWN; 1451 break; 1452 default: 1453 *(link_state_t *)pr_val = LINK_STATE_UNKNOWN; 1454 } 1455 break; 1456 case MAC_PROP_AUTONEG: 1457 if (pr_valsize < sizeof (uint8_t)) { 1458 ret = EOVERFLOW; 1459 break; 1460 } 1461 *(uint8_t *)pr_val = port->mlp_autoneg; 1462 break; 1463 case MAC_PROP_ADV_FEC_CAP: 1464 if (pr_valsize < sizeof (link_fec_t)) { 1465 ret = EOVERFLOW; 1466 break; 1467 } 1468 *(link_fec_t *)pr_val = 1469 mlxcx_fec_to_link_fec(port->mlp_fec_active); 1470 break; 1471 case MAC_PROP_EN_FEC_CAP: 1472 if (pr_valsize < sizeof (link_fec_t)) { 1473 ret = EOVERFLOW; 1474 break; 1475 } 1476 *(link_fec_t *)pr_val = port->mlp_fec_requested; 1477 break; 1478 case MAC_PROP_MTU: 1479 if (pr_valsize < sizeof (uint32_t)) { 1480 ret = EOVERFLOW; 1481 break; 1482 } 1483 *(uint32_t *)pr_val = port->mlp_mtu - MLXCX_MTU_OFFSET; 1484 break; 1485 case MAC_PROP_ADV_400GFDX_CAP: 1486 case MAC_PROP_EN_400GFDX_CAP: 1487 if (pr_valsize < sizeof (uint8_t)) { 1488 ret = EOVERFLOW; 1489 break; 1490 } 1491 *(uint8_t *)pr_val = 1492 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_400G) != 0; 1493 break; 1494 case MAC_PROP_ADV_200GFDX_CAP: 1495 case MAC_PROP_EN_200GFDX_CAP: 1496 if (pr_valsize < sizeof (uint8_t)) { 1497 ret = EOVERFLOW; 1498 break; 1499 } 1500 *(uint8_t *)pr_val = 1501 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_200G) != 0; 1502 break; 1503 case MAC_PROP_ADV_100GFDX_CAP: 1504 case MAC_PROP_EN_100GFDX_CAP: 1505 if (pr_valsize < sizeof (uint8_t)) { 1506 ret = EOVERFLOW; 1507 break; 1508 } 1509 *(uint8_t *)pr_val = (port->mlp_max_proto & 1510 MLXCX_PROTO_100G) != 0 || 1511 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_100G) != 0; 1512 break; 1513 case MAC_PROP_ADV_50GFDX_CAP: 1514 case MAC_PROP_EN_50GFDX_CAP: 1515 if (pr_valsize < sizeof (uint8_t)) { 1516 ret = EOVERFLOW; 1517 break; 1518 } 1519 *(uint8_t *)pr_val = (port->mlp_max_proto & 1520 MLXCX_PROTO_50G) != 0 || 1521 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_50G) != 0; 1522 break; 1523 case MAC_PROP_ADV_40GFDX_CAP: 1524 case MAC_PROP_EN_40GFDX_CAP: 1525 if (pr_valsize < sizeof (uint8_t)) { 1526 ret = EOVERFLOW; 1527 break; 1528 } 1529 *(uint8_t *)pr_val = (port->mlp_max_proto & 1530 MLXCX_PROTO_40G) != 0 || 1531 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_40G) != 0; 1532 break; 1533 case MAC_PROP_ADV_25GFDX_CAP: 1534 case MAC_PROP_EN_25GFDX_CAP: 1535 if (pr_valsize < sizeof (uint8_t)) { 1536 ret = EOVERFLOW; 1537 break; 1538 } 1539 *(uint8_t *)pr_val = (port->mlp_max_proto & 1540 MLXCX_PROTO_25G) != 0 || 1541 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_25G) != 0; 1542 break; 1543 case MAC_PROP_ADV_10GFDX_CAP: 1544 case MAC_PROP_EN_10GFDX_CAP: 1545 if (pr_valsize < sizeof (uint8_t)) { 1546 ret = EOVERFLOW; 1547 break; 1548 } 1549 *(uint8_t *)pr_val = (port->mlp_max_proto & 1550 MLXCX_PROTO_10G) != 0 || 1551 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_10G) != 0; 1552 break; 1553 case MAC_PROP_ADV_1000FDX_CAP: 1554 case MAC_PROP_EN_1000FDX_CAP: 1555 if (pr_valsize < sizeof (uint8_t)) { 1556 ret = EOVERFLOW; 1557 break; 1558 } 1559 *(uint8_t *)pr_val = (port->mlp_max_proto & 1560 MLXCX_PROTO_1G) != 0 || 1561 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_1G) != 0; 1562 break; 1563 case MAC_PROP_ADV_100FDX_CAP: 1564 case MAC_PROP_EN_100FDX_CAP: 1565 if (pr_valsize < sizeof (uint8_t)) { 1566 ret = EOVERFLOW; 1567 break; 1568 } 1569 *(uint8_t *)pr_val = (port->mlp_max_proto & 1570 MLXCX_PROTO_100M) != 0 || 1571 (port->mlp_ext_max_proto & MLXCX_EXTPROTO_100M) != 0; 1572 break; 1573 default: 1574 ret = ENOTSUP; 1575 break; 1576 } 1577 1578 mutex_exit(&port->mlp_mtx); 1579 1580 return (ret); 1581 } 1582 1583 #define MLXCX_MAC_CALLBACK_FLAGS \ 1584 (MC_GETCAPAB | MC_GETPROP | MC_PROPINFO | MC_SETPROP) 1585 1586 static mac_callbacks_t mlxcx_mac_callbacks = { 1587 .mc_callbacks = MLXCX_MAC_CALLBACK_FLAGS, 1588 .mc_getstat = mlxcx_mac_stat, 1589 .mc_start = mlxcx_mac_start, 1590 .mc_stop = mlxcx_mac_stop, 1591 .mc_setpromisc = mlxcx_mac_setpromisc, 1592 .mc_multicst = mlxcx_mac_multicast, 1593 .mc_ioctl = NULL, 1594 .mc_getcapab = mlxcx_mac_getcapab, 1595 .mc_setprop = mlxcx_mac_setprop, 1596 .mc_getprop = mlxcx_mac_getprop, 1597 .mc_propinfo = mlxcx_mac_propinfo, 1598 .mc_tx = NULL, 1599 .mc_unicst = NULL, 1600 }; 1601 1602 boolean_t 1603 mlxcx_register_mac(mlxcx_t *mlxp) 1604 { 1605 mac_register_t *mac = mac_alloc(MAC_VERSION); 1606 mlxcx_port_t *port; 1607 int ret; 1608 1609 if (mac == NULL) 1610 return (B_FALSE); 1611 1612 VERIFY3U(mlxp->mlx_nports, ==, 1); 1613 port = &mlxp->mlx_ports[0]; 1614 1615 mutex_enter(&port->mlp_mtx); 1616 1617 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1618 mac->m_driver = mlxp; 1619 mac->m_dip = mlxp->mlx_dip; 1620 mac->m_src_addr = port->mlp_mac_address; 1621 mac->m_callbacks = &mlxcx_mac_callbacks; 1622 mac->m_min_sdu = MLXCX_MTU_OFFSET; 1623 mac->m_max_sdu = port->mlp_mtu - MLXCX_MTU_OFFSET; 1624 mac->m_margin = VLAN_TAGSZ; 1625 mac->m_priv_props = mlxcx_priv_props; 1626 mac->m_v12n = MAC_VIRT_LEVEL1; 1627 1628 ret = mac_register(mac, &mlxp->mlx_mac_hdl); 1629 if (ret != 0) { 1630 mlxcx_warn(mlxp, "mac_register() returned %d", ret); 1631 } 1632 mac_free(mac); 1633 1634 mutex_exit(&port->mlp_mtx); 1635 1636 mlxcx_update_link_state(mlxp, port); 1637 1638 return (ret == 0); 1639 } 1640