1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright (c) 2020, the University of Queensland 14 * Copyright 2020 RackTop Systems, Inc. 15 */ 16 17 /* 18 * Mellanox Connect-X 4/5/6 driver. 19 */ 20 21 #include <sys/modctl.h> 22 #include <sys/conf.h> 23 #include <sys/devops.h> 24 #include <sys/sysmacros.h> 25 #include <sys/vlan.h> 26 27 #include <sys/pattr.h> 28 #include <sys/dlpi.h> 29 30 #include <sys/mac_provider.h> 31 32 /* Need these for mac_vlan_header_info() */ 33 #include <sys/mac_client.h> 34 #include <sys/mac_client_priv.h> 35 36 #include <mlxcx.h> 37 38 static char *mlxcx_priv_props[] = { 39 NULL 40 }; 41 42 #define MBITS 1000000ULL 43 #define GBITS (1000ULL * MBITS) 44 45 static uint64_t 46 mlxcx_speed_to_bits(mlxcx_eth_proto_t v) 47 { 48 switch (v) { 49 case MLXCX_PROTO_SGMII_100BASE: 50 return (100ULL * MBITS); 51 case MLXCX_PROTO_SGMII: 52 case MLXCX_PROTO_1000BASE_KX: 53 return (1000ULL * MBITS); 54 case MLXCX_PROTO_10GBASE_CX4: 55 case MLXCX_PROTO_10GBASE_KX4: 56 case MLXCX_PROTO_10GBASE_KR: 57 case MLXCX_PROTO_10GBASE_CR: 58 case MLXCX_PROTO_10GBASE_SR: 59 case MLXCX_PROTO_10GBASE_ER_LR: 60 return (10ULL * GBITS); 61 case MLXCX_PROTO_40GBASE_CR4: 62 case MLXCX_PROTO_40GBASE_KR4: 63 case MLXCX_PROTO_40GBASE_SR4: 64 case MLXCX_PROTO_40GBASE_LR4_ER4: 65 return (40ULL * GBITS); 66 case MLXCX_PROTO_25GBASE_CR: 67 case MLXCX_PROTO_25GBASE_KR: 68 case MLXCX_PROTO_25GBASE_SR: 69 return (25ULL * GBITS); 70 case MLXCX_PROTO_50GBASE_SR2: 71 case MLXCX_PROTO_50GBASE_CR2: 72 case MLXCX_PROTO_50GBASE_KR2: 73 return (50ULL * GBITS); 74 case MLXCX_PROTO_100GBASE_CR4: 75 case MLXCX_PROTO_100GBASE_SR4: 76 case MLXCX_PROTO_100GBASE_KR4: 77 return (100ULL * GBITS); 78 default: 79 return (0); 80 } 81 } 82 83 static link_fec_t 84 mlxcx_fec_to_link_fec(mlxcx_pplm_fec_active_t mlxcx_fec) 85 { 86 if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_NONE) != 0) 87 return (LINK_FEC_NONE); 88 89 if ((mlxcx_fec & MLXCX_PPLM_FEC_ACTIVE_FIRECODE) != 0) 90 return (LINK_FEC_BASE_R); 91 92 if ((mlxcx_fec & (MLXCX_PPLM_FEC_ACTIVE_RS528 | 93 MLXCX_PPLM_FEC_ACTIVE_RS271 | MLXCX_PPLM_FEC_ACTIVE_RS544 | 94 MLXCX_PPLM_FEC_ACTIVE_RS272)) != 0) 95 return (LINK_FEC_RS); 96 97 return (LINK_FEC_NONE); 98 } 99 100 static boolean_t 101 mlxcx_link_fec_cap(link_fec_t fec, mlxcx_pplm_fec_caps_t *pfecp) 102 { 103 mlxcx_pplm_fec_caps_t pplm_fec = 0; 104 105 if ((fec & LINK_FEC_AUTO) != 0) { 106 pplm_fec = MLXCX_PPLM_FEC_CAP_AUTO; 107 fec &= ~LINK_FEC_AUTO; 108 } else if ((fec & LINK_FEC_NONE) != 0) { 109 pplm_fec = MLXCX_PPLM_FEC_CAP_NONE; 110 fec &= ~LINK_FEC_NONE; 111 } else if ((fec & LINK_FEC_RS) != 0) { 112 pplm_fec |= MLXCX_PPLM_FEC_CAP_RS; 113 fec &= ~LINK_FEC_RS; 114 } else if ((fec & LINK_FEC_BASE_R) != 0) { 115 pplm_fec |= MLXCX_PPLM_FEC_CAP_FIRECODE; 116 fec &= ~LINK_FEC_BASE_R; 117 } 118 119 /* 120 * Only one fec option is allowed. 121 */ 122 if (fec != 0) 123 return (B_FALSE); 124 125 *pfecp = pplm_fec; 126 127 return (B_TRUE); 128 } 129 130 static int 131 mlxcx_mac_stat_rfc_2863(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, 132 uint64_t *val) 133 { 134 int ret = 0; 135 boolean_t ok; 136 mlxcx_register_data_t data; 137 mlxcx_ppcnt_rfc_2863_t *st; 138 139 ASSERT(mutex_owned(&port->mlp_mtx)); 140 141 bzero(&data, sizeof (data)); 142 data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1; 143 data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_RFC_2863; 144 data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR; 145 146 ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, 147 MLXCX_REG_PPCNT, &data); 148 if (!ok) 149 return (EIO); 150 st = &data.mlrd_ppcnt.mlrd_ppcnt_rfc_2863; 151 152 switch (stat) { 153 case MAC_STAT_RBYTES: 154 *val = from_be64(st->mlppc_rfc_2863_in_octets); 155 break; 156 case MAC_STAT_MULTIRCV: 157 *val = from_be64(st->mlppc_rfc_2863_in_mcast_pkts); 158 break; 159 case MAC_STAT_BRDCSTRCV: 160 *val = from_be64(st->mlppc_rfc_2863_in_bcast_pkts); 161 break; 162 case MAC_STAT_MULTIXMT: 163 *val = from_be64(st->mlppc_rfc_2863_out_mcast_pkts); 164 break; 165 case MAC_STAT_BRDCSTXMT: 166 *val = from_be64(st->mlppc_rfc_2863_out_bcast_pkts); 167 break; 168 case MAC_STAT_IERRORS: 169 *val = from_be64(st->mlppc_rfc_2863_in_errors); 170 break; 171 case MAC_STAT_UNKNOWNS: 172 *val = from_be64(st->mlppc_rfc_2863_in_unknown_protos); 173 break; 174 case MAC_STAT_OERRORS: 175 *val = from_be64(st->mlppc_rfc_2863_out_errors); 176 break; 177 case MAC_STAT_OBYTES: 178 *val = from_be64(st->mlppc_rfc_2863_out_octets); 179 break; 180 default: 181 ret = ENOTSUP; 182 } 183 184 return (ret); 185 } 186 187 static int 188 mlxcx_mac_stat_ieee_802_3(mlxcx_t *mlxp, mlxcx_port_t *port, uint_t stat, 189 uint64_t *val) 190 { 191 int ret = 0; 192 boolean_t ok; 193 mlxcx_register_data_t data; 194 mlxcx_ppcnt_ieee_802_3_t *st; 195 196 ASSERT(mutex_owned(&port->mlp_mtx)); 197 198 bzero(&data, sizeof (data)); 199 data.mlrd_ppcnt.mlrd_ppcnt_local_port = port->mlp_num + 1; 200 data.mlrd_ppcnt.mlrd_ppcnt_grp = MLXCX_PPCNT_GRP_IEEE_802_3; 201 data.mlrd_ppcnt.mlrd_ppcnt_clear = MLXCX_PPCNT_NO_CLEAR; 202 203 ok = mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ, 204 MLXCX_REG_PPCNT, &data); 205 if (!ok) 206 return (EIO); 207 st = &data.mlrd_ppcnt.mlrd_ppcnt_ieee_802_3; 208 209 switch (stat) { 210 case MAC_STAT_IPACKETS: 211 *val = from_be64(st->mlppc_ieee_802_3_frames_rx); 212 break; 213 case MAC_STAT_OPACKETS: 214 *val = from_be64(st->mlppc_ieee_802_3_frames_tx); 215 break; 216 case ETHER_STAT_ALIGN_ERRORS: 217 *val = from_be64(st->mlppc_ieee_802_3_align_err); 218 break; 219 case ETHER_STAT_FCS_ERRORS: 220 *val = from_be64(st->mlppc_ieee_802_3_fcs_err); 221 break; 222 case ETHER_STAT_TOOLONG_ERRORS: 223 *val = from_be64(st->mlppc_ieee_802_3_frame_too_long_err); 224 break; 225 default: 226 ret = ENOTSUP; 227 } 228 229 return (ret); 230 } 231 232 static int 233 mlxcx_mac_stat(void *arg, uint_t stat, uint64_t *val) 234 { 235 mlxcx_t *mlxp = (mlxcx_t *)arg; 236 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 237 int ret = 0; 238 239 mutex_enter(&port->mlp_mtx); 240 241 switch (stat) { 242 case MAC_STAT_IFSPEED: 243 *val = mlxcx_speed_to_bits(port->mlp_oper_proto); 244 break; 245 case ETHER_STAT_LINK_DUPLEX: 246 *val = LINK_DUPLEX_FULL; 247 break; 248 case MAC_STAT_RBYTES: 249 case MAC_STAT_MULTIRCV: 250 case MAC_STAT_BRDCSTRCV: 251 case MAC_STAT_MULTIXMT: 252 case MAC_STAT_BRDCSTXMT: 253 case MAC_STAT_IERRORS: 254 case MAC_STAT_UNKNOWNS: 255 case MAC_STAT_OERRORS: 256 case MAC_STAT_OBYTES: 257 ret = mlxcx_mac_stat_rfc_2863(mlxp, port, stat, val); 258 break; 259 case MAC_STAT_IPACKETS: 260 case MAC_STAT_OPACKETS: 261 case ETHER_STAT_ALIGN_ERRORS: 262 case ETHER_STAT_FCS_ERRORS: 263 case ETHER_STAT_TOOLONG_ERRORS: 264 ret = mlxcx_mac_stat_ieee_802_3(mlxp, port, stat, val); 265 break; 266 case MAC_STAT_NORCVBUF: 267 *val = port->mlp_stats.mlps_rx_drops; 268 break; 269 default: 270 ret = ENOTSUP; 271 } 272 273 mutex_exit(&port->mlp_mtx); 274 275 return (ret); 276 } 277 278 static int 279 mlxcx_mac_led_set(void *arg, mac_led_mode_t mode, uint_t flags) 280 { 281 mlxcx_t *mlxp = arg; 282 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 283 int ret = 0; 284 285 if (flags != 0) { 286 return (EINVAL); 287 } 288 289 mutex_enter(&port->mlp_mtx); 290 291 switch (mode) { 292 case MAC_LED_DEFAULT: 293 case MAC_LED_OFF: 294 if (!mlxcx_cmd_set_port_led(mlxp, port, 0)) { 295 ret = EIO; 296 break; 297 } 298 break; 299 case MAC_LED_IDENT: 300 if (!mlxcx_cmd_set_port_led(mlxp, port, UINT16_MAX)) { 301 ret = EIO; 302 break; 303 } 304 break; 305 default: 306 ret = ENOTSUP; 307 } 308 309 mutex_exit(&port->mlp_mtx); 310 311 return (ret); 312 } 313 314 static int 315 mlxcx_mac_txr_info(void *arg, uint_t id, mac_transceiver_info_t *infop) 316 { 317 mlxcx_t *mlxp = arg; 318 mlxcx_module_status_t st; 319 320 if (!mlxcx_cmd_query_module_status(mlxp, id, &st, NULL)) 321 return (EIO); 322 323 if (st != MLXCX_MODULE_UNPLUGGED) 324 mac_transceiver_info_set_present(infop, B_TRUE); 325 326 if (st == MLXCX_MODULE_PLUGGED) 327 mac_transceiver_info_set_usable(infop, B_TRUE); 328 329 return (0); 330 } 331 332 static int 333 mlxcx_mac_txr_read(void *arg, uint_t id, uint_t page, void *vbuf, 334 size_t nbytes, off_t offset, size_t *nread) 335 { 336 mlxcx_t *mlxp = arg; 337 mlxcx_register_data_t data; 338 uint8_t *buf = vbuf; 339 boolean_t ok; 340 size_t take, done = 0; 341 uint8_t i2c_addr; 342 343 if (id != 0 || vbuf == NULL || nbytes == 0 || nread == NULL) 344 return (EINVAL); 345 346 if (nbytes > 256 || offset >= 256 || (offset + nbytes > 256)) 347 return (EINVAL); 348 349 /* 350 * The PRM is really not very clear about any of this, but it seems 351 * that the i2c_device_addr field in MCIA is the SFP+ spec "page" 352 * number shifted right by 1 bit. They're written in the SFF spec 353 * like "1010000X" so Mellanox just dropped the X. 354 * 355 * This means that if we want page 0xA0, we put 0x50 in the 356 * i2c_device_addr field. 357 * 358 * The "page_number" field in MCIA means something else. Don't ask me 359 * what. FreeBSD leaves it as zero, so we will too! 360 */ 361 i2c_addr = page >> 1; 362 363 while (done < nbytes) { 364 take = nbytes - done; 365 if (take > sizeof (data.mlrd_mcia.mlrd_mcia_data)) 366 take = sizeof (data.mlrd_mcia.mlrd_mcia_data); 367 368 bzero(&data, sizeof (data)); 369 ASSERT3U(id, <=, 0xff); 370 data.mlrd_mcia.mlrd_mcia_module = (uint8_t)id; 371 data.mlrd_mcia.mlrd_mcia_i2c_device_addr = i2c_addr; 372 data.mlrd_mcia.mlrd_mcia_device_addr = to_be16(offset); 373 data.mlrd_mcia.mlrd_mcia_size = to_be16(take); 374 375 ok = mlxcx_cmd_access_register(mlxp, 376 MLXCX_CMD_ACCESS_REGISTER_READ, MLXCX_REG_MCIA, &data); 377 if (!ok) { 378 *nread = 0; 379 return (EIO); 380 } 381 382 if (data.mlrd_mcia.mlrd_mcia_status != MLXCX_MCIA_STATUS_OK) { 383 *nread = 0; 384 return (EIO); 385 } 386 387 bcopy(data.mlrd_mcia.mlrd_mcia_data, &buf[done], take); 388 389 done += take; 390 offset += take; 391 } 392 *nread = done; 393 return (0); 394 } 395 396 static int 397 mlxcx_mac_ring_stat(mac_ring_driver_t rh, uint_t stat, uint64_t *val) 398 { 399 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; 400 (void) wq; 401 402 /* 403 * We should add support for using hw flow counters and such to 404 * get per-ring statistics. Not done yet though! 405 */ 406 407 switch (stat) { 408 default: 409 *val = 0; 410 return (ENOTSUP); 411 } 412 413 return (0); 414 } 415 416 static int 417 mlxcx_mac_start(void *arg) 418 { 419 mlxcx_t *mlxp = (mlxcx_t *)arg; 420 (void) mlxp; 421 return (0); 422 } 423 424 static void 425 mlxcx_mac_stop(void *arg) 426 { 427 mlxcx_t *mlxp = (mlxcx_t *)arg; 428 (void) mlxp; 429 } 430 431 static mblk_t * 432 mlxcx_mac_ring_tx(void *arg, mblk_t *mp) 433 { 434 mlxcx_work_queue_t *sq = (mlxcx_work_queue_t *)arg; 435 mlxcx_t *mlxp = sq->mlwq_mlx; 436 mlxcx_completion_queue_t *cq; 437 mlxcx_buffer_t *b; 438 mac_header_info_t mhi; 439 mblk_t *kmp, *nmp; 440 uint8_t inline_hdrs[MLXCX_MAX_INLINE_HEADERLEN]; 441 size_t inline_hdrlen, rem, off; 442 uint32_t chkflags = 0; 443 boolean_t ok; 444 size_t take = 0; 445 uint_t bcount; 446 447 VERIFY(mp->b_next == NULL); 448 449 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &chkflags); 450 451 if (mac_vlan_header_info(mlxp->mlx_mac_hdl, mp, &mhi) != 0) { 452 /* 453 * We got given a frame without a valid L2 header on it. We 454 * can't really transmit that (mlx parts don't like it), so 455 * we will just drop it on the floor. 456 */ 457 freemsg(mp); 458 return (NULL); 459 } 460 461 inline_hdrlen = rem = mhi.mhi_hdrsize; 462 463 kmp = mp; 464 off = 0; 465 while (rem > 0) { 466 const ptrdiff_t sz = MBLKL(kmp); 467 ASSERT3S(sz, >=, 0); 468 ASSERT3U(sz, <=, SIZE_MAX); 469 take = sz; 470 if (take > rem) 471 take = rem; 472 bcopy(kmp->b_rptr, inline_hdrs + off, take); 473 rem -= take; 474 off += take; 475 if (take == sz) { 476 take = 0; 477 kmp = kmp->b_cont; 478 } 479 } 480 481 bcount = mlxcx_buf_bind_or_copy(mlxp, sq, kmp, take, &b); 482 if (bcount == 0) { 483 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); 484 return (mp); 485 } 486 487 mutex_enter(&sq->mlwq_mtx); 488 VERIFY3U(sq->mlwq_inline_mode, <=, MLXCX_ETH_INLINE_L2); 489 cq = sq->mlwq_cq; 490 491 /* 492 * state is a single int, so read-only access without the CQ lock 493 * should be fine. 494 */ 495 if (cq->mlcq_state & MLXCX_CQ_TEARDOWN) { 496 mutex_exit(&sq->mlwq_mtx); 497 mlxcx_buf_return_chain(mlxp, b, B_FALSE); 498 return (NULL); 499 } 500 501 if ((sq->mlwq_state & (MLXCX_WQ_TEARDOWN | MLXCX_WQ_STARTED)) != 502 MLXCX_WQ_STARTED) { 503 mutex_exit(&sq->mlwq_mtx); 504 mlxcx_buf_return_chain(mlxp, b, B_FALSE); 505 return (NULL); 506 } 507 508 /* 509 * If the completion queue buffer count is already at or above 510 * the high water mark, or the addition of this new chain will 511 * exceed the CQ ring size, then indicate we are blocked. 512 */ 513 if (cq->mlcq_bufcnt >= cq->mlcq_bufhwm || 514 (cq->mlcq_bufcnt + bcount) > cq->mlcq_nents) { 515 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); 516 goto blocked; 517 } 518 519 if (sq->mlwq_wqebb_used >= sq->mlwq_bufhwm) { 520 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); 521 goto blocked; 522 } 523 524 ok = mlxcx_sq_add_buffer(mlxp, sq, inline_hdrs, inline_hdrlen, 525 chkflags, b); 526 if (!ok) { 527 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_BLOCKED_MAC); 528 atomic_or_uint(&sq->mlwq_state, MLXCX_WQ_BLOCKED_MAC); 529 goto blocked; 530 } 531 532 /* 533 * Now that we've successfully enqueued the rest of the packet, 534 * free any mblks that we cut off while inlining headers. 535 */ 536 for (; mp != kmp; mp = nmp) { 537 nmp = mp->b_cont; 538 freeb(mp); 539 } 540 541 mutex_exit(&sq->mlwq_mtx); 542 543 return (NULL); 544 545 blocked: 546 mutex_exit(&sq->mlwq_mtx); 547 mlxcx_buf_return_chain(mlxp, b, B_TRUE); 548 return (mp); 549 } 550 551 static int 552 mlxcx_mac_setpromisc(void *arg, boolean_t on) 553 { 554 mlxcx_t *mlxp = (mlxcx_t *)arg; 555 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 556 mlxcx_flow_group_t *fg; 557 mlxcx_flow_entry_t *fe; 558 mlxcx_flow_table_t *ft; 559 mlxcx_ring_group_t *g; 560 int ret = 0; 561 uint_t idx; 562 563 mutex_enter(&port->mlp_mtx); 564 565 /* 566 * First, do the top-level flow entry on the root flow table for 567 * the port. This catches all traffic that doesn't match any MAC 568 * MAC filters. 569 */ 570 ft = port->mlp_rx_flow; 571 mutex_enter(&ft->mlft_mtx); 572 fg = port->mlp_promisc; 573 fe = list_head(&fg->mlfg_entries); 574 if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 575 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 576 ret = EIO; 577 } 578 } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 579 if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { 580 ret = EIO; 581 } 582 } 583 mutex_exit(&ft->mlft_mtx); 584 585 /* 586 * If we failed to change the top-level entry, don't bother with 587 * trying the per-group ones. 588 */ 589 if (ret != 0) { 590 mutex_exit(&port->mlp_mtx); 591 return (ret); 592 } 593 594 /* 595 * Then, do the per-rx-group flow entries which catch traffic that 596 * matched a MAC filter but failed to match a VLAN filter. 597 */ 598 for (idx = 0; idx < mlxp->mlx_rx_ngroups; ++idx) { 599 g = &mlxp->mlx_rx_groups[idx]; 600 601 mutex_enter(&g->mlg_mtx); 602 603 ft = g->mlg_rx_vlan_ft; 604 mutex_enter(&ft->mlft_mtx); 605 606 fg = g->mlg_rx_vlan_promisc_fg; 607 fe = list_head(&fg->mlfg_entries); 608 if (on && !(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 609 if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) { 610 ret = EIO; 611 } 612 } else if (!on && (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED)) { 613 if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) { 614 ret = EIO; 615 } 616 } 617 618 mutex_exit(&ft->mlft_mtx); 619 mutex_exit(&g->mlg_mtx); 620 } 621 622 mutex_exit(&port->mlp_mtx); 623 return (ret); 624 } 625 626 static int 627 mlxcx_mac_multicast(void *arg, boolean_t add, const uint8_t *addr) 628 { 629 mlxcx_t *mlxp = (mlxcx_t *)arg; 630 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 631 mlxcx_ring_group_t *g = &mlxp->mlx_rx_groups[0]; 632 int ret = 0; 633 634 mutex_enter(&port->mlp_mtx); 635 mutex_enter(&g->mlg_mtx); 636 if (add) { 637 if (!mlxcx_add_umcast_entry(mlxp, port, g, addr)) { 638 ret = EIO; 639 } 640 } else { 641 if (!mlxcx_remove_umcast_entry(mlxp, port, g, addr)) { 642 ret = EIO; 643 } 644 } 645 mutex_exit(&g->mlg_mtx); 646 mutex_exit(&port->mlp_mtx); 647 return (ret); 648 } 649 650 static int 651 mlxcx_group_add_mac(void *arg, const uint8_t *mac_addr) 652 { 653 mlxcx_ring_group_t *g = arg; 654 mlxcx_t *mlxp = g->mlg_mlx; 655 mlxcx_port_t *port = g->mlg_port; 656 int ret = 0; 657 658 mutex_enter(&port->mlp_mtx); 659 mutex_enter(&g->mlg_mtx); 660 if (!mlxcx_add_umcast_entry(mlxp, port, g, mac_addr)) { 661 ret = EIO; 662 } 663 mutex_exit(&g->mlg_mtx); 664 mutex_exit(&port->mlp_mtx); 665 666 return (ret); 667 } 668 669 /* 670 * Support for VLAN steering into groups is not yet available in upstream 671 * illumos. 672 */ 673 #if defined(MAC_VLAN_UNTAGGED) 674 675 static int 676 mlxcx_group_add_vlan(mac_group_driver_t gh, uint16_t vid) 677 { 678 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; 679 mlxcx_t *mlxp = g->mlg_mlx; 680 int ret = 0; 681 boolean_t tagged = B_TRUE; 682 683 if (vid == MAC_VLAN_UNTAGGED) { 684 vid = 0; 685 tagged = B_FALSE; 686 } 687 688 mutex_enter(&g->mlg_mtx); 689 if (!mlxcx_add_vlan_entry(mlxp, g, tagged, vid)) { 690 ret = EIO; 691 } 692 mutex_exit(&g->mlg_mtx); 693 694 return (ret); 695 } 696 697 static int 698 mlxcx_group_remove_vlan(mac_group_driver_t gh, uint16_t vid) 699 { 700 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; 701 mlxcx_t *mlxp = g->mlg_mlx; 702 int ret = 0; 703 boolean_t tagged = B_TRUE; 704 705 if (vid == MAC_VLAN_UNTAGGED) { 706 vid = 0; 707 tagged = B_FALSE; 708 } 709 710 mutex_enter(&g->mlg_mtx); 711 if (!mlxcx_remove_vlan_entry(mlxp, g, tagged, vid)) { 712 ret = EIO; 713 } 714 mutex_exit(&g->mlg_mtx); 715 716 return (ret); 717 } 718 719 #endif /* MAC_VLAN_UNTAGGED */ 720 721 static int 722 mlxcx_group_remove_mac(void *arg, const uint8_t *mac_addr) 723 { 724 mlxcx_ring_group_t *g = arg; 725 mlxcx_t *mlxp = g->mlg_mlx; 726 mlxcx_port_t *port = g->mlg_port; 727 int ret = 0; 728 729 mutex_enter(&port->mlp_mtx); 730 mutex_enter(&g->mlg_mtx); 731 if (!mlxcx_remove_umcast_entry(mlxp, port, g, mac_addr)) { 732 ret = EIO; 733 } 734 mutex_exit(&g->mlg_mtx); 735 mutex_exit(&port->mlp_mtx); 736 737 return (ret); 738 } 739 740 static int 741 mlxcx_mac_ring_start(mac_ring_driver_t rh, uint64_t gen_num) 742 { 743 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; 744 mlxcx_completion_queue_t *cq = wq->mlwq_cq; 745 mlxcx_ring_group_t *g = wq->mlwq_group; 746 mlxcx_t *mlxp = wq->mlwq_mlx; 747 748 ASSERT(cq != NULL); 749 ASSERT(g != NULL); 750 751 ASSERT(wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ || 752 wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ); 753 if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && 754 !mlxcx_tx_ring_start(mlxp, g, wq)) 755 return (EIO); 756 if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && 757 !mlxcx_rx_ring_start(mlxp, g, wq)) 758 return (EIO); 759 760 mutex_enter(&cq->mlcq_mtx); 761 cq->mlcq_mac_gen = gen_num; 762 mutex_exit(&cq->mlcq_mtx); 763 764 return (0); 765 } 766 767 static void 768 mlxcx_mac_ring_stop(mac_ring_driver_t rh) 769 { 770 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)rh; 771 mlxcx_completion_queue_t *cq = wq->mlwq_cq; 772 mlxcx_t *mlxp = wq->mlwq_mlx; 773 mlxcx_buf_shard_t *s; 774 mlxcx_buffer_t *buf; 775 776 /* 777 * To prevent deadlocks and sleeping whilst holding either the 778 * CQ mutex or WQ mutex, we split the stop processing into two 779 * parts. 780 * 781 * With the CQ amd WQ mutexes held the appropriate WQ is stopped. 782 * The Q in the HCA is set to Reset state and flagged as no 783 * longer started. Atomic with changing this WQ state, the buffer 784 * shards are flagged as draining. 785 * 786 * Now, any requests for buffers and attempts to submit messages 787 * will fail and once we're in this state it is safe to relinquish 788 * the CQ and WQ mutexes. Allowing us to complete the ring stop 789 * by waiting for the buffer lists, with the exception of 790 * the loaned list, to drain. Buffers on the loaned list are 791 * not under our control, we will get them back when the mblk tied 792 * to the buffer is freed. 793 */ 794 795 mutex_enter(&cq->mlcq_mtx); 796 mutex_enter(&wq->mlwq_mtx); 797 798 if (wq->mlwq_state & MLXCX_WQ_STARTED) { 799 if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ && 800 !mlxcx_cmd_stop_rq(mlxp, wq)) { 801 mutex_exit(&wq->mlwq_mtx); 802 mutex_exit(&cq->mlcq_mtx); 803 return; 804 } 805 if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ && 806 !mlxcx_cmd_stop_sq(mlxp, wq)) { 807 mutex_exit(&wq->mlwq_mtx); 808 mutex_exit(&cq->mlcq_mtx); 809 return; 810 } 811 } 812 ASSERT0(wq->mlwq_state & MLXCX_WQ_STARTED); 813 814 mlxcx_shard_draining(wq->mlwq_bufs); 815 if (wq->mlwq_foreign_bufs != NULL) 816 mlxcx_shard_draining(wq->mlwq_foreign_bufs); 817 818 819 if (wq->mlwq_state & MLXCX_WQ_BUFFERS) { 820 mutex_exit(&wq->mlwq_mtx); 821 mutex_exit(&cq->mlcq_mtx); 822 823 /* Return any outstanding buffers to the free pool. */ 824 while ((buf = list_remove_head(&cq->mlcq_buffers)) != NULL) { 825 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 826 } 827 mutex_enter(&cq->mlcq_bufbmtx); 828 while ((buf = list_remove_head(&cq->mlcq_buffers_b)) != NULL) { 829 mlxcx_buf_return_chain(mlxp, buf, B_FALSE); 830 } 831 mutex_exit(&cq->mlcq_bufbmtx); 832 cq->mlcq_bufcnt = 0; 833 834 s = wq->mlwq_bufs; 835 mutex_enter(&s->mlbs_mtx); 836 while (!list_is_empty(&s->mlbs_busy)) 837 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); 838 while ((buf = list_head(&s->mlbs_free)) != NULL) { 839 mlxcx_buf_destroy(mlxp, buf); 840 } 841 mutex_exit(&s->mlbs_mtx); 842 843 s = wq->mlwq_foreign_bufs; 844 if (s != NULL) { 845 mutex_enter(&s->mlbs_mtx); 846 while (!list_is_empty(&s->mlbs_busy)) 847 cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx); 848 while ((buf = list_head(&s->mlbs_free)) != NULL) { 849 mlxcx_buf_destroy(mlxp, buf); 850 } 851 mutex_exit(&s->mlbs_mtx); 852 } 853 854 mutex_enter(&wq->mlwq_mtx); 855 wq->mlwq_state &= ~MLXCX_WQ_BUFFERS; 856 mutex_exit(&wq->mlwq_mtx); 857 } else { 858 mutex_exit(&wq->mlwq_mtx); 859 mutex_exit(&cq->mlcq_mtx); 860 } 861 } 862 863 static int 864 mlxcx_mac_group_start(mac_group_driver_t gh) 865 { 866 mlxcx_ring_group_t *g = (mlxcx_ring_group_t *)gh; 867 mlxcx_t *mlxp = g->mlg_mlx; 868 869 VERIFY3S(g->mlg_type, ==, MLXCX_GROUP_RX); 870 ASSERT(mlxp != NULL); 871 872 if (g->mlg_state & MLXCX_GROUP_RUNNING) 873 return (0); 874 875 if (!mlxcx_rx_group_start(mlxp, g)) 876 return (EIO); 877 878 return (0); 879 } 880 881 static void 882 mlxcx_mac_fill_tx_ring(void *arg, mac_ring_type_t rtype, const int group_index, 883 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 884 { 885 mlxcx_t *mlxp = (mlxcx_t *)arg; 886 mlxcx_ring_group_t *g; 887 mlxcx_work_queue_t *wq; 888 mac_intr_t *mintr = &infop->mri_intr; 889 890 if (rtype != MAC_RING_TYPE_TX) 891 return; 892 ASSERT3S(group_index, ==, -1); 893 894 g = &mlxp->mlx_tx_groups[0]; 895 ASSERT(g->mlg_state & MLXCX_GROUP_INIT); 896 mutex_enter(&g->mlg_mtx); 897 898 ASSERT3S(ring_index, >=, 0); 899 ASSERT3S(ring_index, <, g->mlg_nwqs); 900 901 wq = &g->mlg_wqs[ring_index]; 902 903 wq->mlwq_cq->mlcq_mac_hdl = rh; 904 905 infop->mri_driver = (mac_ring_driver_t)wq; 906 infop->mri_start = mlxcx_mac_ring_start; 907 infop->mri_stop = mlxcx_mac_ring_stop; 908 infop->mri_tx = mlxcx_mac_ring_tx; 909 infop->mri_stat = mlxcx_mac_ring_stat; 910 911 mintr->mi_ddi_handle = mlxp->mlx_intr_handles[ 912 wq->mlwq_cq->mlcq_eq->mleq_intr_index]; 913 914 mutex_exit(&g->mlg_mtx); 915 } 916 917 static int 918 mlxcx_mac_ring_intr_enable(mac_intr_handle_t intrh) 919 { 920 mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh; 921 mlxcx_event_queue_t *eq = cq->mlcq_eq; 922 mlxcx_t *mlxp = cq->mlcq_mlx; 923 924 /* 925 * We are going to call mlxcx_arm_cq() here, so we take the EQ lock 926 * as well as the CQ one to make sure we don't race against 927 * mlxcx_intr_n(). 928 */ 929 mutex_enter(&eq->mleq_mtx); 930 mutex_enter(&cq->mlcq_mtx); 931 if (cq->mlcq_state & MLXCX_CQ_POLLING) { 932 cq->mlcq_state &= ~MLXCX_CQ_POLLING; 933 if (!(cq->mlcq_state & MLXCX_CQ_ARMED)) 934 mlxcx_arm_cq(mlxp, cq); 935 } 936 mutex_exit(&cq->mlcq_mtx); 937 mutex_exit(&eq->mleq_mtx); 938 939 return (0); 940 } 941 942 static int 943 mlxcx_mac_ring_intr_disable(mac_intr_handle_t intrh) 944 { 945 mlxcx_completion_queue_t *cq = (mlxcx_completion_queue_t *)intrh; 946 947 mutex_enter(&cq->mlcq_mtx); 948 atomic_or_uint(&cq->mlcq_state, MLXCX_CQ_POLLING); 949 mutex_exit(&cq->mlcq_mtx); 950 951 return (0); 952 } 953 954 static mblk_t * 955 mlxcx_mac_ring_rx_poll(void *arg, int poll_bytes) 956 { 957 mlxcx_work_queue_t *wq = (mlxcx_work_queue_t *)arg; 958 mlxcx_completion_queue_t *cq = wq->mlwq_cq; 959 mlxcx_t *mlxp = wq->mlwq_mlx; 960 mblk_t *mp; 961 962 ASSERT(cq != NULL); 963 ASSERT3S(poll_bytes, >, 0); 964 if (poll_bytes == 0) 965 return (NULL); 966 967 mutex_enter(&cq->mlcq_mtx); 968 mp = mlxcx_rx_poll(mlxp, cq, poll_bytes); 969 mutex_exit(&cq->mlcq_mtx); 970 971 return (mp); 972 } 973 974 static void 975 mlxcx_mac_fill_rx_ring(void *arg, mac_ring_type_t rtype, const int group_index, 976 const int ring_index, mac_ring_info_t *infop, mac_ring_handle_t rh) 977 { 978 mlxcx_t *mlxp = (mlxcx_t *)arg; 979 mlxcx_ring_group_t *g; 980 mlxcx_work_queue_t *wq; 981 mac_intr_t *mintr = &infop->mri_intr; 982 983 if (rtype != MAC_RING_TYPE_RX) 984 return; 985 ASSERT3S(group_index, >=, 0); 986 ASSERT3S(group_index, <, mlxp->mlx_rx_ngroups); 987 988 g = &mlxp->mlx_rx_groups[group_index]; 989 ASSERT(g->mlg_state & MLXCX_GROUP_INIT); 990 mutex_enter(&g->mlg_mtx); 991 992 ASSERT3S(ring_index, >=, 0); 993 ASSERT3S(ring_index, <, g->mlg_nwqs); 994 995 ASSERT(g->mlg_state & MLXCX_GROUP_WQS); 996 wq = &g->mlg_wqs[ring_index]; 997 998 wq->mlwq_cq->mlcq_mac_hdl = rh; 999 1000 infop->mri_driver = (mac_ring_driver_t)wq; 1001 infop->mri_start = mlxcx_mac_ring_start; 1002 infop->mri_stop = mlxcx_mac_ring_stop; 1003 infop->mri_poll = mlxcx_mac_ring_rx_poll; 1004 infop->mri_stat = mlxcx_mac_ring_stat; 1005 1006 mintr->mi_handle = (mac_intr_handle_t)wq->mlwq_cq; 1007 mintr->mi_enable = mlxcx_mac_ring_intr_enable; 1008 mintr->mi_disable = mlxcx_mac_ring_intr_disable; 1009 1010 mintr->mi_ddi_handle = mlxp->mlx_intr_handles[ 1011 wq->mlwq_cq->mlcq_eq->mleq_intr_index]; 1012 1013 mutex_exit(&g->mlg_mtx); 1014 } 1015 1016 static void 1017 mlxcx_mac_fill_rx_group(void *arg, mac_ring_type_t rtype, const int index, 1018 mac_group_info_t *infop, mac_group_handle_t gh) 1019 { 1020 mlxcx_t *mlxp = (mlxcx_t *)arg; 1021 mlxcx_ring_group_t *g; 1022 1023 if (rtype != MAC_RING_TYPE_RX) 1024 return; 1025 1026 ASSERT3S(index, >=, 0); 1027 ASSERT3S(index, <, mlxp->mlx_rx_ngroups); 1028 g = &mlxp->mlx_rx_groups[index]; 1029 ASSERT(g->mlg_state & MLXCX_GROUP_INIT); 1030 1031 g->mlg_mac_hdl = gh; 1032 1033 infop->mgi_driver = (mac_group_driver_t)g; 1034 infop->mgi_start = mlxcx_mac_group_start; 1035 infop->mgi_stop = NULL; 1036 infop->mgi_addmac = mlxcx_group_add_mac; 1037 infop->mgi_remmac = mlxcx_group_remove_mac; 1038 #if defined(MAC_VLAN_UNTAGGED) 1039 infop->mgi_addvlan = mlxcx_group_add_vlan; 1040 infop->mgi_remvlan = mlxcx_group_remove_vlan; 1041 #endif /* MAC_VLAN_UNTAGGED */ 1042 1043 infop->mgi_count = g->mlg_nwqs; 1044 } 1045 1046 static boolean_t 1047 mlxcx_mac_getcapab(void *arg, mac_capab_t cap, void *cap_data) 1048 { 1049 mlxcx_t *mlxp = (mlxcx_t *)arg; 1050 mac_capab_rings_t *cap_rings; 1051 mac_capab_led_t *cap_leds; 1052 mac_capab_transceiver_t *cap_txr; 1053 uint_t i, n = 0; 1054 1055 switch (cap) { 1056 1057 case MAC_CAPAB_RINGS: 1058 cap_rings = cap_data; 1059 cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 1060 switch (cap_rings->mr_type) { 1061 case MAC_RING_TYPE_TX: 1062 cap_rings->mr_gnum = 0; 1063 cap_rings->mr_rnum = mlxp->mlx_tx_groups[0].mlg_nwqs; 1064 cap_rings->mr_rget = mlxcx_mac_fill_tx_ring; 1065 cap_rings->mr_gget = NULL; 1066 cap_rings->mr_gaddring = NULL; 1067 cap_rings->mr_gremring = NULL; 1068 break; 1069 case MAC_RING_TYPE_RX: 1070 cap_rings->mr_gnum = mlxp->mlx_rx_ngroups; 1071 for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) 1072 n += mlxp->mlx_rx_groups[i].mlg_nwqs; 1073 cap_rings->mr_rnum = n; 1074 cap_rings->mr_rget = mlxcx_mac_fill_rx_ring; 1075 cap_rings->mr_gget = mlxcx_mac_fill_rx_group; 1076 cap_rings->mr_gaddring = NULL; 1077 cap_rings->mr_gremring = NULL; 1078 break; 1079 default: 1080 return (B_FALSE); 1081 } 1082 break; 1083 1084 case MAC_CAPAB_HCKSUM: 1085 if (mlxp->mlx_caps->mlc_checksum) { 1086 *(uint32_t *)cap_data = HCKSUM_INET_FULL_V4 | 1087 HCKSUM_INET_FULL_V6 | HCKSUM_IPHDRCKSUM; 1088 } 1089 break; 1090 1091 case MAC_CAPAB_LED: 1092 cap_leds = cap_data; 1093 1094 cap_leds->mcl_flags = 0; 1095 cap_leds->mcl_modes = MAC_LED_DEFAULT | MAC_LED_OFF | 1096 MAC_LED_IDENT; 1097 cap_leds->mcl_set = mlxcx_mac_led_set; 1098 break; 1099 1100 case MAC_CAPAB_TRANSCEIVER: 1101 cap_txr = cap_data; 1102 1103 cap_txr->mct_flags = 0; 1104 cap_txr->mct_ntransceivers = 1; 1105 cap_txr->mct_info = mlxcx_mac_txr_info; 1106 cap_txr->mct_read = mlxcx_mac_txr_read; 1107 break; 1108 1109 default: 1110 return (B_FALSE); 1111 } 1112 1113 return (B_TRUE); 1114 } 1115 1116 static void 1117 mlxcx_mac_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1118 mac_prop_info_handle_t prh) 1119 { 1120 mlxcx_t *mlxp = (mlxcx_t *)arg; 1121 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 1122 1123 mutex_enter(&port->mlp_mtx); 1124 1125 switch (pr_num) { 1126 case MAC_PROP_DUPLEX: 1127 case MAC_PROP_SPEED: 1128 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1129 break; 1130 case MAC_PROP_MTU: 1131 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1132 mac_prop_info_set_range_uint32(prh, MLXCX_MTU_OFFSET, 1133 port->mlp_max_mtu); 1134 mac_prop_info_set_default_uint32(prh, 1135 port->mlp_mtu - MLXCX_MTU_OFFSET); 1136 break; 1137 case MAC_PROP_AUTONEG: 1138 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1139 mac_prop_info_set_default_uint8(prh, 1); 1140 break; 1141 case MAC_PROP_ADV_FEC_CAP: 1142 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1143 mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO); 1144 break; 1145 case MAC_PROP_EN_FEC_CAP: 1146 mac_prop_info_set_perm(prh, MAC_PROP_PERM_RW); 1147 mac_prop_info_set_default_fec(prh, LINK_FEC_AUTO); 1148 break; 1149 case MAC_PROP_ADV_100GFDX_CAP: 1150 case MAC_PROP_EN_100GFDX_CAP: 1151 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1152 mac_prop_info_set_default_uint8(prh, 1153 (port->mlp_oper_proto & MLXCX_PROTO_100G) != 0); 1154 break; 1155 case MAC_PROP_ADV_50GFDX_CAP: 1156 case MAC_PROP_EN_50GFDX_CAP: 1157 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1158 mac_prop_info_set_default_uint8(prh, 1159 (port->mlp_oper_proto & MLXCX_PROTO_50G) != 0); 1160 break; 1161 case MAC_PROP_ADV_40GFDX_CAP: 1162 case MAC_PROP_EN_40GFDX_CAP: 1163 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1164 mac_prop_info_set_default_uint8(prh, 1165 (port->mlp_oper_proto & MLXCX_PROTO_40G) != 0); 1166 break; 1167 case MAC_PROP_ADV_25GFDX_CAP: 1168 case MAC_PROP_EN_25GFDX_CAP: 1169 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1170 mac_prop_info_set_default_uint8(prh, 1171 (port->mlp_oper_proto & MLXCX_PROTO_25G) != 0); 1172 break; 1173 case MAC_PROP_ADV_10GFDX_CAP: 1174 case MAC_PROP_EN_10GFDX_CAP: 1175 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1176 mac_prop_info_set_default_uint8(prh, 1177 (port->mlp_oper_proto & MLXCX_PROTO_10G) != 0); 1178 break; 1179 case MAC_PROP_ADV_1000FDX_CAP: 1180 case MAC_PROP_EN_1000FDX_CAP: 1181 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1182 mac_prop_info_set_default_uint8(prh, 1183 (port->mlp_oper_proto & MLXCX_PROTO_1G) != 0); 1184 break; 1185 case MAC_PROP_ADV_100FDX_CAP: 1186 case MAC_PROP_EN_100FDX_CAP: 1187 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 1188 mac_prop_info_set_default_uint8(prh, 1189 (port->mlp_oper_proto & MLXCX_PROTO_100M) != 0); 1190 break; 1191 default: 1192 break; 1193 } 1194 1195 mutex_exit(&port->mlp_mtx); 1196 } 1197 1198 static int 1199 mlxcx_mac_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1200 uint_t pr_valsize, const void *pr_val) 1201 { 1202 mlxcx_t *mlxp = (mlxcx_t *)arg; 1203 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 1204 int ret = 0; 1205 uint32_t new_mtu, new_hw_mtu, old_mtu; 1206 mlxcx_buf_shard_t *sh; 1207 boolean_t allocd = B_FALSE; 1208 boolean_t relink = B_FALSE; 1209 link_fec_t fec; 1210 mlxcx_pplm_fec_caps_t cap_fec; 1211 1212 mutex_enter(&port->mlp_mtx); 1213 1214 switch (pr_num) { 1215 case MAC_PROP_MTU: 1216 bcopy(pr_val, &new_mtu, sizeof (new_mtu)); 1217 new_hw_mtu = new_mtu + MLXCX_MTU_OFFSET; 1218 if (new_hw_mtu == port->mlp_mtu) 1219 break; 1220 if (new_hw_mtu > port->mlp_max_mtu) { 1221 ret = EINVAL; 1222 break; 1223 } 1224 sh = list_head(&mlxp->mlx_buf_shards); 1225 for (; sh != NULL; sh = list_next(&mlxp->mlx_buf_shards, sh)) { 1226 mutex_enter(&sh->mlbs_mtx); 1227 if (!list_is_empty(&sh->mlbs_free) || 1228 !list_is_empty(&sh->mlbs_busy) || 1229 !list_is_empty(&sh->mlbs_loaned)) { 1230 allocd = B_TRUE; 1231 mutex_exit(&sh->mlbs_mtx); 1232 break; 1233 } 1234 mutex_exit(&sh->mlbs_mtx); 1235 } 1236 if (allocd) { 1237 ret = EBUSY; 1238 break; 1239 } 1240 old_mtu = port->mlp_mtu; 1241 ret = mac_maxsdu_update(mlxp->mlx_mac_hdl, new_mtu); 1242 if (ret != 0) 1243 break; 1244 port->mlp_mtu = new_hw_mtu; 1245 if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, port, 1246 MLXCX_MODIFY_NIC_VPORT_CTX_MTU)) { 1247 port->mlp_mtu = old_mtu; 1248 (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu); 1249 ret = EIO; 1250 break; 1251 } 1252 if (!mlxcx_cmd_set_port_mtu(mlxp, port)) { 1253 port->mlp_mtu = old_mtu; 1254 (void) mac_maxsdu_update(mlxp->mlx_mac_hdl, old_mtu); 1255 ret = EIO; 1256 break; 1257 } 1258 break; 1259 1260 case MAC_PROP_EN_FEC_CAP: 1261 bcopy(pr_val, &fec, sizeof (fec)); 1262 if (!mlxcx_link_fec_cap(fec, &cap_fec)) { 1263 ret = EINVAL; 1264 break; 1265 } 1266 1267 /* 1268 * Don't change the FEC if it is already at the requested 1269 * setting AND the port is up. 1270 * When the port is down, always set the FEC and attempt 1271 * to retrain the link. 1272 */ 1273 if (fec == port->mlp_fec_requested && 1274 fec == mlxcx_fec_to_link_fec(port->mlp_fec_active) && 1275 port->mlp_oper_status != MLXCX_PORT_STATUS_DOWN) 1276 break; 1277 1278 /* 1279 * The most like cause of this failing is an invalid 1280 * or unsupported fec option. 1281 */ 1282 if (!mlxcx_cmd_modify_port_fec(mlxp, port, cap_fec)) { 1283 ret = EINVAL; 1284 break; 1285 } 1286 1287 port->mlp_fec_requested = fec; 1288 1289 /* 1290 * For FEC to become effective, the link needs to go back 1291 * to training and negotiation state. This happens when 1292 * the link transitions from down to up, force a relink. 1293 */ 1294 relink = B_TRUE; 1295 break; 1296 1297 default: 1298 ret = ENOTSUP; 1299 break; 1300 } 1301 1302 if (relink) { 1303 if (!mlxcx_cmd_modify_port_status(mlxp, port, 1304 MLXCX_PORT_STATUS_DOWN) || 1305 !mlxcx_cmd_modify_port_status(mlxp, port, 1306 MLXCX_PORT_STATUS_UP)) { 1307 ret = EIO; 1308 } 1309 } 1310 mutex_exit(&port->mlp_mtx); 1311 1312 return (ret); 1313 } 1314 1315 static int 1316 mlxcx_mac_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 1317 uint_t pr_valsize, void *pr_val) 1318 { 1319 mlxcx_t *mlxp = (mlxcx_t *)arg; 1320 mlxcx_port_t *port = &mlxp->mlx_ports[0]; 1321 uint64_t speed; 1322 int ret = 0; 1323 1324 mutex_enter(&port->mlp_mtx); 1325 1326 switch (pr_num) { 1327 case MAC_PROP_DUPLEX: 1328 if (pr_valsize < sizeof (link_duplex_t)) { 1329 ret = EOVERFLOW; 1330 break; 1331 } 1332 /* connectx parts only support full duplex */ 1333 *(link_duplex_t *)pr_val = LINK_DUPLEX_FULL; 1334 break; 1335 case MAC_PROP_SPEED: 1336 if (pr_valsize < sizeof (uint64_t)) { 1337 ret = EOVERFLOW; 1338 break; 1339 } 1340 speed = mlxcx_speed_to_bits(port->mlp_oper_proto); 1341 bcopy(&speed, pr_val, sizeof (speed)); 1342 break; 1343 case MAC_PROP_STATUS: 1344 if (pr_valsize < sizeof (link_state_t)) { 1345 ret = EOVERFLOW; 1346 break; 1347 } 1348 switch (port->mlp_oper_status) { 1349 case MLXCX_PORT_STATUS_UP: 1350 case MLXCX_PORT_STATUS_UP_ONCE: 1351 *(link_state_t *)pr_val = LINK_STATE_UP; 1352 break; 1353 case MLXCX_PORT_STATUS_DOWN: 1354 *(link_state_t *)pr_val = LINK_STATE_DOWN; 1355 break; 1356 default: 1357 *(link_state_t *)pr_val = LINK_STATE_UNKNOWN; 1358 } 1359 break; 1360 case MAC_PROP_AUTONEG: 1361 if (pr_valsize < sizeof (uint8_t)) { 1362 ret = EOVERFLOW; 1363 break; 1364 } 1365 *(uint8_t *)pr_val = port->mlp_autoneg; 1366 break; 1367 case MAC_PROP_ADV_FEC_CAP: 1368 if (pr_valsize < sizeof (link_fec_t)) { 1369 ret = EOVERFLOW; 1370 break; 1371 } 1372 *(link_fec_t *)pr_val = 1373 mlxcx_fec_to_link_fec(port->mlp_fec_active); 1374 break; 1375 case MAC_PROP_EN_FEC_CAP: 1376 if (pr_valsize < sizeof (link_fec_t)) { 1377 ret = EOVERFLOW; 1378 break; 1379 } 1380 *(link_fec_t *)pr_val = port->mlp_fec_requested; 1381 break; 1382 case MAC_PROP_MTU: 1383 if (pr_valsize < sizeof (uint32_t)) { 1384 ret = EOVERFLOW; 1385 break; 1386 } 1387 *(uint32_t *)pr_val = port->mlp_mtu - MLXCX_MTU_OFFSET; 1388 break; 1389 case MAC_PROP_ADV_100GFDX_CAP: 1390 case MAC_PROP_EN_100GFDX_CAP: 1391 if (pr_valsize < sizeof (uint8_t)) { 1392 ret = EOVERFLOW; 1393 break; 1394 } 1395 *(uint8_t *)pr_val = (port->mlp_max_proto & 1396 MLXCX_PROTO_100G) != 0; 1397 break; 1398 case MAC_PROP_ADV_50GFDX_CAP: 1399 case MAC_PROP_EN_50GFDX_CAP: 1400 if (pr_valsize < sizeof (uint8_t)) { 1401 ret = EOVERFLOW; 1402 break; 1403 } 1404 *(uint8_t *)pr_val = (port->mlp_max_proto & 1405 MLXCX_PROTO_50G) != 0; 1406 break; 1407 case MAC_PROP_ADV_40GFDX_CAP: 1408 case MAC_PROP_EN_40GFDX_CAP: 1409 if (pr_valsize < sizeof (uint8_t)) { 1410 ret = EOVERFLOW; 1411 break; 1412 } 1413 *(uint8_t *)pr_val = (port->mlp_max_proto & 1414 MLXCX_PROTO_40G) != 0; 1415 break; 1416 case MAC_PROP_ADV_25GFDX_CAP: 1417 case MAC_PROP_EN_25GFDX_CAP: 1418 if (pr_valsize < sizeof (uint8_t)) { 1419 ret = EOVERFLOW; 1420 break; 1421 } 1422 *(uint8_t *)pr_val = (port->mlp_max_proto & 1423 MLXCX_PROTO_25G) != 0; 1424 break; 1425 case MAC_PROP_ADV_10GFDX_CAP: 1426 case MAC_PROP_EN_10GFDX_CAP: 1427 if (pr_valsize < sizeof (uint8_t)) { 1428 ret = EOVERFLOW; 1429 break; 1430 } 1431 *(uint8_t *)pr_val = (port->mlp_max_proto & 1432 MLXCX_PROTO_10G) != 0; 1433 break; 1434 case MAC_PROP_ADV_1000FDX_CAP: 1435 case MAC_PROP_EN_1000FDX_CAP: 1436 if (pr_valsize < sizeof (uint8_t)) { 1437 ret = EOVERFLOW; 1438 break; 1439 } 1440 *(uint8_t *)pr_val = (port->mlp_max_proto & 1441 MLXCX_PROTO_1G) != 0; 1442 break; 1443 case MAC_PROP_ADV_100FDX_CAP: 1444 case MAC_PROP_EN_100FDX_CAP: 1445 if (pr_valsize < sizeof (uint8_t)) { 1446 ret = EOVERFLOW; 1447 break; 1448 } 1449 *(uint8_t *)pr_val = (port->mlp_max_proto & 1450 MLXCX_PROTO_100M) != 0; 1451 break; 1452 default: 1453 ret = ENOTSUP; 1454 break; 1455 } 1456 1457 mutex_exit(&port->mlp_mtx); 1458 1459 return (ret); 1460 } 1461 1462 #define MLXCX_MAC_CALLBACK_FLAGS \ 1463 (MC_GETCAPAB | MC_GETPROP | MC_PROPINFO | MC_SETPROP) 1464 1465 static mac_callbacks_t mlxcx_mac_callbacks = { 1466 .mc_callbacks = MLXCX_MAC_CALLBACK_FLAGS, 1467 .mc_getstat = mlxcx_mac_stat, 1468 .mc_start = mlxcx_mac_start, 1469 .mc_stop = mlxcx_mac_stop, 1470 .mc_setpromisc = mlxcx_mac_setpromisc, 1471 .mc_multicst = mlxcx_mac_multicast, 1472 .mc_ioctl = NULL, 1473 .mc_getcapab = mlxcx_mac_getcapab, 1474 .mc_setprop = mlxcx_mac_setprop, 1475 .mc_getprop = mlxcx_mac_getprop, 1476 .mc_propinfo = mlxcx_mac_propinfo, 1477 .mc_tx = NULL, 1478 .mc_unicst = NULL, 1479 }; 1480 1481 boolean_t 1482 mlxcx_register_mac(mlxcx_t *mlxp) 1483 { 1484 mac_register_t *mac = mac_alloc(MAC_VERSION); 1485 mlxcx_port_t *port; 1486 int ret; 1487 1488 if (mac == NULL) 1489 return (B_FALSE); 1490 1491 VERIFY3U(mlxp->mlx_nports, ==, 1); 1492 port = &mlxp->mlx_ports[0]; 1493 1494 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1495 mac->m_driver = mlxp; 1496 mac->m_dip = mlxp->mlx_dip; 1497 mac->m_src_addr = port->mlp_mac_address; 1498 mac->m_callbacks = &mlxcx_mac_callbacks; 1499 mac->m_min_sdu = MLXCX_MTU_OFFSET; 1500 mac->m_max_sdu = port->mlp_mtu - MLXCX_MTU_OFFSET; 1501 mac->m_margin = VLAN_TAGSZ; 1502 mac->m_priv_props = mlxcx_priv_props; 1503 mac->m_v12n = MAC_VIRT_LEVEL1; 1504 1505 ret = mac_register(mac, &mlxp->mlx_mac_hdl); 1506 if (ret != 0) { 1507 mlxcx_warn(mlxp, "mac_register() returned %d", ret); 1508 } 1509 mac_free(mac); 1510 1511 mlxcx_update_link_state(mlxp, port); 1512 1513 return (ret == 0); 1514 } 1515