1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/conf.h> 28 #include <sys/devops.h> 29 #include <sys/kmem.h> 30 #include <sys/ksynch.h> 31 #include <sys/modctl.h> 32 #include <sys/stat.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/mac_provider.h> 36 #include <sys/mac_ether.h> 37 38 #include <sys/ib/clients/eoib/eib_impl.h> 39 40 /* 41 * Declarations private to this file 42 */ 43 static void eib_rb_mac_start(eib_t *, eib_vnic_t *); 44 45 /* 46 * This set of routines are used to set/clear the condition that the 47 * caller is about to do something that affects the state of the nic. 48 * If there's already someone doing either a start or a stop (possibly 49 * due to the async handler, a plumb or a dlpi_open happening, or an 50 * unplumb or dlpi_close coming in), we wait until that's done. 51 */ 52 void 53 eib_mac_set_nic_state(eib_t *ss, uint_t flags) 54 { 55 eib_node_state_t *ns = ss->ei_node_state; 56 57 mutex_enter(&ns->ns_lock); 58 59 while ((ns->ns_nic_state & EIB_NIC_STARTING) || 60 (ns->ns_nic_state & EIB_NIC_STOPPING)) { 61 cv_wait(&ns->ns_cv, &ns->ns_lock); 62 } 63 ns->ns_nic_state |= flags; 64 65 mutex_exit(&ns->ns_lock); 66 } 67 68 void 69 eib_mac_clr_nic_state(eib_t *ss, uint_t flags) 70 { 71 eib_node_state_t *ns = ss->ei_node_state; 72 73 mutex_enter(&ns->ns_lock); 74 75 ns->ns_nic_state &= (~flags); 76 77 cv_broadcast(&ns->ns_cv); 78 mutex_exit(&ns->ns_lock); 79 } 80 81 void 82 eib_mac_upd_nic_state(eib_t *ss, uint_t clr_flags, uint_t set_flags) 83 { 84 eib_node_state_t *ns = ss->ei_node_state; 85 86 mutex_enter(&ns->ns_lock); 87 88 ns->ns_nic_state &= (~clr_flags); 89 ns->ns_nic_state |= set_flags; 90 91 cv_broadcast(&ns->ns_cv); 92 mutex_exit(&ns->ns_lock); 93 } 94 95 uint_t 96 eib_mac_get_nic_state(eib_t *ss) 97 { 98 eib_node_state_t *ns = ss->ei_node_state; 99 uint_t nic_state; 100 101 mutex_enter(&ns->ns_lock); 102 nic_state = ns->ns_nic_state; 103 mutex_exit(&ns->ns_lock); 104 105 return (nic_state); 106 } 107 108 void 109 eib_mac_link_state(eib_t *ss, link_state_t new_link_state, 110 boolean_t force) 111 { 112 eib_node_state_t *ns = ss->ei_node_state; 113 boolean_t state_changed = B_FALSE; 114 115 mutex_enter(&ns->ns_lock); 116 117 /* 118 * We track the link state only if the current link state is 119 * not unknown. Obviously therefore, the first calls to set 120 * the link state from eib_mac_start() have to pass an explicit 121 * 'force' flag to force the state change tracking. 122 */ 123 if (ns->ns_link_state != LINK_STATE_UNKNOWN) 124 force = B_TRUE; 125 126 if ((force) && (new_link_state != ns->ns_link_state)) { 127 ns->ns_link_state = new_link_state; 128 state_changed = B_TRUE; 129 } 130 mutex_exit(&ns->ns_lock); 131 132 if (state_changed) { 133 EIB_DPRINTF_DEBUG(ss->ei_instance, 134 "eib_mac_link_state: changing link state to %d", 135 new_link_state); 136 137 mac_link_update(ss->ei_mac_hdl, new_link_state); 138 } else { 139 EIB_DPRINTF_DEBUG(ss->ei_instance, 140 "eib_mac_link_state: link state already %d", 141 new_link_state); 142 } 143 } 144 145 void 146 eib_mac_link_up(eib_t *ss, boolean_t force) 147 { 148 eib_mac_link_state(ss, LINK_STATE_UP, force); 149 } 150 151 void 152 eib_mac_link_down(eib_t *ss, boolean_t force) 153 { 154 eib_mac_link_state(ss, LINK_STATE_DOWN, force); 155 } 156 157 int 158 eib_mac_start(eib_t *ss) 159 { 160 eib_vnic_t *vnic0 = NULL; 161 eib_login_data_t *ld; 162 int err; 163 164 /* 165 * Perform HCA related initializations 166 */ 167 if (eib_ibt_hca_init(ss) != EIB_E_SUCCESS) 168 goto start_fail; 169 170 /* 171 * Make sure port is up. Also record the port base lid if it's up. 172 */ 173 if (eib_mac_hca_portstate(ss, &ss->ei_props->ep_blid, 174 &err) != EIB_E_SUCCESS) { 175 goto start_fail; 176 } 177 178 /* 179 * Set up tx and rx buffer pools 180 */ 181 if (eib_rsrc_setup_bufs(ss, &err) != EIB_E_SUCCESS) 182 goto start_fail; 183 184 /* 185 * Set up admin qp for logins and logouts 186 */ 187 if (eib_adm_setup_qp(ss, &err) != EIB_E_SUCCESS) 188 goto start_fail; 189 190 /* 191 * Create the vnic for physlink (instance 0) 192 */ 193 if (eib_vnic_create(ss, 0, 0, &vnic0, &err) != EIB_E_SUCCESS) 194 goto start_fail; 195 196 /* 197 * Update the mac layer about the correct values for MTU and 198 * unicast MAC address. Note that we've already verified that the 199 * vhub mtu (plus the eoib encapsulation header) is not greater 200 * than our port mtu, so we can go ahead and report the vhub mtu 201 * (of vnic0) directly. 202 */ 203 ld = &(vnic0->vn_login_data); 204 (void) mac_maxsdu_update(ss->ei_mac_hdl, ld->ld_vhub_mtu); 205 mac_unicst_update(ss->ei_mac_hdl, ld->ld_assigned_mac); 206 207 /* 208 * Report that the link is up and ready 209 */ 210 eib_mac_link_up(ss, B_TRUE); 211 return (0); 212 213 start_fail: 214 eib_rb_mac_start(ss, vnic0); 215 eib_mac_link_down(ss, B_TRUE); 216 return (err); 217 } 218 219 void 220 eib_mac_stop(eib_t *ss) 221 { 222 eib_vnic_t *vnic; 223 link_state_t cur_link_state = ss->ei_node_state->ns_link_state; 224 int ndx; 225 226 /* 227 * Stopping an EoIB device instance is somewhat different from starting 228 * it. Between the time the device instance was started and the call to 229 * eib_m_stop() now, a number of vnics could've been created. All of 230 * these will need to be destroyed before we can stop the device. 231 */ 232 for (ndx = EIB_MAX_VNICS - 1; ndx >= 0; ndx--) { 233 if ((vnic = ss->ei_vnic[ndx]) != NULL) 234 eib_vnic_delete(ss, vnic); 235 } 236 237 /* 238 * And now, to undo the things we did in start (other than creation 239 * of vnics itself) 240 */ 241 eib_rb_mac_start(ss, NULL); 242 243 /* 244 * Now that we're completed stopped, there's no mac address assigned 245 * to us. Update the mac layer with this information. Note that we 246 * can let the old max mtu information remain as-is, since we're likely 247 * to get that same mtu on a later plumb. 248 */ 249 mac_unicst_update(ss->ei_mac_hdl, eib_zero_mac); 250 251 /* 252 * If our link state was up when the eib_m_stop() callback was called, 253 * we'll mark the link state as unknown now. Otherwise, we'll leave 254 * the link state as-is (down). 255 */ 256 if (cur_link_state == LINK_STATE_UP) 257 eib_mac_link_state(ss, LINK_STATE_UNKNOWN, B_TRUE); 258 } 259 260 int 261 eib_mac_multicast(eib_t *ss, boolean_t add, uint8_t *mcast_mac) 262 { 263 int ret = EIB_E_SUCCESS; 264 int err = 0; 265 266 /* 267 * If it's a broadcast group join, each vnic needs to and is always 268 * joined to the broadcast address, so we return success immediately. 269 * If it's a broadcast group leave, we fail immediately for the same 270 * reason as above. 271 */ 272 if (bcmp(mcast_mac, eib_broadcast_mac, ETHERADDRL) == 0) { 273 if (add) 274 return (0); 275 else 276 return (EINVAL); 277 } 278 279 if (ss->ei_vnic[0]) { 280 if (add) { 281 ret = eib_vnic_join_data_mcg(ss, ss->ei_vnic[0], 282 mcast_mac, B_FALSE, &err); 283 } else { 284 eib_vnic_leave_data_mcg(ss, ss->ei_vnic[0], mcast_mac); 285 ret = EIB_E_SUCCESS; 286 } 287 } 288 289 if (ret == EIB_E_SUCCESS) 290 return (0); 291 else 292 return (err); 293 } 294 295 int 296 eib_mac_promisc(eib_t *ss, boolean_t set) 297 { 298 int ret = EIB_E_SUCCESS; 299 int err = 0; 300 301 if (ss->ei_vnic[0]) { 302 if (set) { 303 ret = eib_vnic_join_data_mcg(ss, ss->ei_vnic[0], 304 eib_zero_mac, B_FALSE, &err); 305 } else { 306 eib_vnic_leave_data_mcg(ss, ss->ei_vnic[0], 307 eib_zero_mac); 308 ret = EIB_E_SUCCESS; 309 } 310 } 311 312 if (ret == EIB_E_SUCCESS) 313 return (0); 314 else 315 return (err); 316 } 317 318 int 319 eib_mac_tx(eib_t *ss, mblk_t *mp) 320 { 321 eib_ether_hdr_t evh; 322 eib_vnic_t *vnic = NULL; 323 eib_wqe_t *swqe = NULL; 324 boolean_t failed_vnic; 325 int found; 326 int ret; 327 328 /* 329 * Grab a send wqe. If we cannot get one, wake up a service 330 * thread to monitor the swqe status and let the mac layer know 331 * as soon as we have enough tx wqes to start the traffic again. 332 */ 333 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) { 334 EIB_DPRINTF_WARN(ss->ei_instance, "eib_mac_tx: " 335 "no swqe available, holding tx until resource " 336 "becomes available"); 337 eib_rsrc_txwqes_needed(ss); 338 return (EIB_E_FAILURE); 339 } 340 341 /* 342 * Determine dmac, smac and vlan information 343 */ 344 eib_data_parse_ether_hdr(mp, &evh); 345 346 /* 347 * Lookup the {smac, vlan} tuple in our vnic list. If it isn't 348 * there, this is obviously a new packet on a vnic/vlan that 349 * we haven't been informed about. So go ahead and file a request 350 * to create a new vnic. This is obviously not a clean thing to 351 * do - we should be informed when a vnic/vlan is being created 352 * and should be given a proper opportunity to login to the gateway 353 * and do the creation. But we don't have that luxury now, and 354 * this is the next best thing to do. Note that we return failure 355 * from here, so tx flow control should prevent further packets 356 * from coming in until the vnic creation has completed. 357 */ 358 found = eib_data_lookup_vnic(ss, evh.eh_smac, evh.eh_vlan, &vnic, 359 &failed_vnic); 360 if (found != EIB_E_SUCCESS) { 361 uint8_t *m = evh.eh_smac; 362 363 /* 364 * Return the swqe back to the pool 365 */ 366 eib_rsrc_return_swqe(ss, swqe, NULL); 367 368 /* 369 * If we had previously tried creating this vnic and had 370 * failed, we'll simply drop the packets on this vnic. 371 * Otherwise, we'll queue up a request to create this vnic. 372 */ 373 if (failed_vnic) { 374 EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_mac_tx: " 375 "vnic creation for mac=%x:%x:%x:%x:%x:%x " 376 "vlan=0x%x failed previously, dropping pkt", 377 m[0], m[1], m[2], m[3], m[4], m[5], evh.eh_vlan); 378 return (EIB_E_SUCCESS); 379 } else { 380 eib_vnic_need_new(ss, evh.eh_smac, evh.eh_vlan); 381 return (EIB_E_FAILURE); 382 } 383 } 384 385 /* 386 * We'll try to setup the destination in the swqe for this dmac 387 * and vlan. If we don't succeed, there's no need to undo any 388 * vnic-creation we might've made above (if we didn't find the 389 * vnic corresponding to the {smac, vlan} originally). Note that 390 * this is not a resource issue, so we'll issue a warning and 391 * drop the packet, but won't return failure from here. 392 */ 393 ret = eib_vnic_setup_dest(vnic, swqe, evh.eh_dmac, evh.eh_vlan); 394 if (ret != EIB_E_SUCCESS) { 395 uint8_t *dmac; 396 397 dmac = evh.eh_dmac; 398 EIB_DPRINTF_WARN(ss->ei_instance, "eib_mac_tx: " 399 "eib_vnic_setup_dest() failed for mac=%x:%x:%x:%x:%x:%x, " 400 "vlan=0x%x, dropping pkt", dmac[0], dmac[1], dmac[2], 401 dmac[3], dmac[4], dmac[5]); 402 403 eib_rsrc_return_swqe(ss, swqe, NULL); 404 return (EIB_E_SUCCESS); 405 } 406 407 /* 408 * The only reason why this would fail is if we needed LSO buffer(s) 409 * to prepare this frame and couldn't find enough of those. 410 */ 411 ret = eib_data_prepare_frame(vnic, swqe, mp, &evh); 412 if (ret != EIB_E_SUCCESS) { 413 EIB_DPRINTF_WARN(ss->ei_instance, "eib_mac_tx: " 414 "eib_data_prepare_frame() failed (no LSO bufs?), " 415 "holding tx until resource becomes available"); 416 417 eib_rsrc_return_swqe(ss, swqe, NULL); 418 eib_rsrc_lsobufs_needed(ss); 419 return (EIB_E_FAILURE); 420 } 421 422 eib_data_post_tx(vnic, swqe); 423 424 return (EIB_E_SUCCESS); 425 } 426 427 int 428 eib_mac_hca_portstate(eib_t *ss, ib_lid_t *blid, int *err) 429 { 430 ibt_hca_portinfo_t *pi; 431 ibt_status_t ret; 432 uint_t num_pi; 433 uint_t sz_pi; 434 435 ret = ibt_query_hca_ports(ss->ei_hca_hdl, ss->ei_props->ep_port_num, 436 &pi, &num_pi, &sz_pi); 437 if (ret != IBT_SUCCESS) { 438 EIB_DPRINTF_ERR(ss->ei_instance, 439 "ibt_query_hca_ports(hca_hdl=0x%llx, " 440 "port=0x%x) failed, ret=%d", ss->ei_hca_hdl, 441 ss->ei_props->ep_port_num, ret); 442 goto mac_hca_portstate_fail; 443 } 444 if (num_pi != 1) { 445 EIB_DPRINTF_ERR(ss->ei_instance, 446 "ibt_query_hca_ports(hca_hdl=0x%llx, " 447 "port=0x%x) returned num_pi=%d", ss->ei_hca_hdl, 448 ss->ei_props->ep_port_num, num_pi); 449 goto mac_hca_portstate_fail; 450 } 451 452 if (pi->p_linkstate != IBT_PORT_ACTIVE) 453 goto mac_hca_portstate_fail; 454 455 /* 456 * Return the port's base lid if asked 457 */ 458 if (blid) { 459 *blid = pi->p_base_lid; 460 } 461 462 ibt_free_portinfo(pi, sz_pi); 463 return (EIB_E_SUCCESS); 464 465 mac_hca_portstate_fail: 466 if (pi) { 467 ibt_free_portinfo(pi, sz_pi); 468 } 469 if (err) { 470 *err = ENETDOWN; 471 } 472 return (EIB_E_FAILURE); 473 } 474 475 static void 476 eib_rb_mac_start(eib_t *ss, eib_vnic_t *vnic0) 477 { 478 int ntries; 479 480 /* 481 * If vnic0 is non-null, delete it 482 */ 483 if (vnic0) { 484 eib_rb_vnic_create(ss, vnic0, ~0); 485 } 486 487 /* 488 * At this point, we're pretty much done with all communication that 489 * we need to do for vnic-logout, etc. so we can get rid of any address 490 * vectors we might've allocated to send control/data packets. 491 */ 492 eib_ibt_free_avects(ss); 493 494 /* 495 * Tear down the rest of it 496 */ 497 if (ss->ei_admin_chan) { 498 eib_rb_adm_setup_qp(ss); 499 } 500 501 /* 502 * If (say) the network layer has been holding onto our rx buffers, we 503 * wait a reasonable time for it to hand them back to us. If we don't 504 * get it still, we have nothing to do but avoid rolling back hca init 505 * since we cannot unregister the memory, release the pd or close the 506 * hca. We'll try to reuse it if there's a plumb again. 507 */ 508 for (ntries = 0; ntries < EIB_MAX_ATTEMPTS; ntries++) { 509 eib_rb_rsrc_setup_bufs(ss, B_FALSE); 510 if ((ss->ei_tx == NULL) && (ss->ei_rx == NULL) && 511 (ss->ei_lso == NULL)) { 512 break; 513 } 514 515 delay(drv_usectohz(EIB_DELAY_HALF_SECOND)); 516 } 517 518 if (ntries == EIB_MAX_ATTEMPTS) { 519 EIB_DPRINTF_WARN(ss->ei_instance, "eib_rb_mac_start: " 520 "bufs outstanding, tx=0x%llx, rx=0x%llx, lso=0x%llx", 521 ss->ei_tx, ss->ei_rx, ss->ei_lso); 522 } else if (ss->ei_hca_hdl) { 523 eib_rb_ibt_hca_init(ss, ~0); 524 } 525 ss->ei_props->ep_blid = 0; 526 527 /* 528 * Pending vnic creation requests (and failed-vnic records) will have 529 * to be cleaned up in any case 530 */ 531 eib_flush_vnic_reqs(ss); 532 } 533