1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Data-Link Driver 30 */ 31 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/atomic.h> 36 #include <sys/mkdev.h> 37 #include <sys/vlan.h> 38 #include <sys/dld.h> 39 #include <sys/dld_impl.h> 40 #include <sys/dls_impl.h> 41 #include <inet/common.h> 42 43 static int str_constructor(void *, void *, int); 44 static void str_destructor(void *, void *); 45 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 46 static void str_notify_promisc_on_phys(dld_str_t *); 47 static void str_notify_promisc_off_phys(dld_str_t *); 48 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 49 static void str_notify_link_up(dld_str_t *); 50 static void str_notify_link_down(dld_str_t *); 51 static void str_notify_capab_reneg(dld_str_t *); 52 static void str_notify_speed(dld_str_t *, uint32_t); 53 static void str_notify(void *, mac_notify_type_t); 54 55 static void ioc_native(dld_str_t *, mblk_t *); 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 61 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t); 62 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *); 63 64 static uint32_t str_count; 65 static kmem_cache_t *str_cachep; 66 static uint32_t minor_count; 67 static mod_hash_t *str_hashp; 68 69 #define STR_HASHSZ 64 70 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 71 72 /* 73 * Some notes on entry points, flow-control, queueing and locking: 74 * 75 * This driver exports the traditional STREAMS put entry point as well as 76 * the non-STREAMS fast-path transmit routine which is provided to IP via 77 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 78 * and data operations, while the fast-path routine deals only with M_DATA 79 * fast-path packets. Regardless of the entry point, all outbound packets 80 * will end up in dld_tx_single(), where they will be delivered to the MAC 81 * driver. 82 * 83 * The transmit logic operates in two modes: a "not busy" mode where the 84 * packets will be delivered to the MAC for a send attempt, or "busy" mode 85 * where they will be enqueued in the internal queue because of flow-control. 86 * Flow-control happens when the MAC driver indicates the packets couldn't 87 * be transmitted due to lack of resources (e.g. running out of descriptors). 88 * In such case, the driver will place a dummy message on its write-side 89 * STREAMS queue so that the queue is marked as "full". Any subsequent 90 * packets arriving at the driver will be enqueued in the internal queue, 91 * which is drained in the context of the service thread that gets scheduled 92 * whenever the driver is in the "busy" mode. When all packets have been 93 * successfully delivered by MAC and the internal queue is empty, it will 94 * transition to the "not busy" mode by removing the dummy message from the 95 * write-side STREAMS queue; in effect this will trigger backenabling. 96 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 97 * to the above reasons. 98 * 99 * The driver implements an internal transmit queue independent of STREAMS. 100 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 101 * compared to the putq() and get() STREAMS interfaces. The only putq() and 102 * getq() operations done by the driver are those related to placing and 103 * removing the dummy message to/from the write-side STREAMS queue for flow- 104 * control purposes. 105 * 106 * Locking is done independent of STREAMS due to the driver being fully MT. 107 * Threads entering the driver (either from put or service entry points) 108 * will most likely be readers, with the exception of a few writer cases 109 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 110 * DLD-related ioctl requests. The DLPI detach case is special, because 111 * it involves freeing resources and therefore must be single-threaded. 112 * Unfortunately the readers/writers lock can't be used to protect against 113 * it, because the lock is dropped prior to the driver calling places where 114 * putnext() may be invoked, and such places may depend on those resources 115 * to exist. Because of this, the driver always completes the DLPI detach 116 * process when there are no other threads running in the driver. This is 117 * done by keeping track of the number of threads, such that the the last 118 * thread leaving the driver will finish the pending DLPI detach operation. 119 */ 120 121 /* 122 * dld_max_q_count is the queue depth threshold used to limit the number of 123 * outstanding packets or bytes allowed in the queue; once this limit is 124 * reached the driver will free any incoming ones until the queue depth 125 * drops below the threshold. 126 * 127 * This buffering is provided to accomodate clients which do not employ 128 * their own buffering scheme, and to handle occasional packet bursts. 129 * Clients which handle their own buffering will receive positive feedback 130 * from this driver as soon as it transitions into the "busy" state, i.e. 131 * when the queue is initially filled up; they will get backenabled once 132 * the queue is empty. 133 * 134 * The value chosen here is rather arbitrary; in future some intelligent 135 * heuristics may be involved which could take into account the hardware's 136 * transmit ring size, etc. 137 */ 138 uint_t dld_max_q_count = (16 * 1024 *1024); 139 140 /* 141 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 142 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 143 * match dev_t. If a stream is found and it is attached, its dev_info_t * 144 * is returned. 145 */ 146 typedef struct i_dld_str_state_s { 147 major_t ds_major; 148 minor_t ds_minor; 149 dev_info_t *ds_dip; 150 } i_dld_str_state_t; 151 152 /* ARGSUSED */ 153 static uint_t 154 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 155 { 156 i_dld_str_state_t *statep = arg; 157 dld_str_t *dsp = (dld_str_t *)val; 158 159 if (statep->ds_major != dsp->ds_major) 160 return (MH_WALK_CONTINUE); 161 162 ASSERT(statep->ds_minor != 0); 163 164 /* 165 * Access to ds_ppa and ds_mh need to be protected by ds_lock. 166 */ 167 rw_enter(&dsp->ds_lock, RW_READER); 168 if (statep->ds_minor <= DLD_MAX_MINOR) { 169 /* 170 * Style 1: minor can be derived from the ppa. we 171 * continue to walk until we find a matching stream 172 * in attached state. 173 */ 174 if (statep->ds_minor == DLS_PPA2MINOR(dsp->ds_ppa) && 175 dsp->ds_mh != NULL) { 176 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 177 rw_exit(&dsp->ds_lock); 178 return (MH_WALK_TERMINATE); 179 } 180 } else { 181 /* 182 * Clone: a clone minor is unique. we can terminate the 183 * walk if we find a matching stream -- even if we fail 184 * to obtain the devinfo. 185 */ 186 if (statep->ds_minor == dsp->ds_minor) { 187 if (dsp->ds_mh != NULL) 188 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 189 rw_exit(&dsp->ds_lock); 190 return (MH_WALK_TERMINATE); 191 } 192 } 193 rw_exit(&dsp->ds_lock); 194 return (MH_WALK_CONTINUE); 195 } 196 197 static dev_info_t * 198 dld_finddevinfo(dev_t dev) 199 { 200 i_dld_str_state_t state; 201 202 state.ds_minor = getminor(dev); 203 state.ds_major = getmajor(dev); 204 state.ds_dip = NULL; 205 206 if (state.ds_minor == 0) 207 return (NULL); 208 209 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 210 if (state.ds_dip != NULL || state.ds_minor <= DLD_MAX_MINOR) 211 return (state.ds_dip); 212 213 /* See if it's a minor node of a VLAN */ 214 return (dls_finddevinfo(dev)); 215 } 216 217 /* 218 * devo_getinfo: getinfo(9e) 219 */ 220 /*ARGSUSED*/ 221 int 222 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 223 { 224 dev_info_t *devinfo; 225 minor_t minor = getminor((dev_t)arg); 226 int rc = DDI_FAILURE; 227 228 switch (cmd) { 229 case DDI_INFO_DEVT2DEVINFO: 230 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 231 *(dev_info_t **)resp = devinfo; 232 rc = DDI_SUCCESS; 233 } 234 break; 235 case DDI_INFO_DEVT2INSTANCE: 236 if (minor > 0 && minor <= DLD_MAX_MINOR) { 237 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 238 rc = DDI_SUCCESS; 239 } else if (minor > DLD_MAX_MINOR && 240 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 241 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 242 rc = DDI_SUCCESS; 243 } 244 break; 245 } 246 return (rc); 247 } 248 249 /* 250 * qi_qopen: open(9e) 251 */ 252 /*ARGSUSED*/ 253 int 254 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 255 { 256 dld_str_t *dsp; 257 major_t major; 258 minor_t minor; 259 int err; 260 261 if (sflag == MODOPEN) 262 return (ENOTSUP); 263 264 /* 265 * This is a cloning driver and therefore each queue should only 266 * ever get opened once. 267 */ 268 if (rq->q_ptr != NULL) 269 return (EBUSY); 270 271 major = getmajor(*devp); 272 minor = getminor(*devp); 273 274 /* 275 * Create a new dld_str_t for the stream. This will grab a new minor 276 * number that will be handed back in the cloned dev_t. Creation may 277 * fail if we can't allocate the dummy mblk used for flow-control. 278 */ 279 dsp = dld_str_create(rq, DLD_DLPI, major, 280 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 281 if (dsp == NULL) 282 return (ENOSR); 283 284 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 285 if (minor != 0) { 286 /* 287 * Style 1 open 288 */ 289 t_uscalar_t ppa; 290 291 if ((err = dls_ppa_from_minor(minor, &ppa)) != 0) 292 goto failed; 293 294 if ((err = dld_str_attach(dsp, ppa)) != 0) 295 goto failed; 296 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 297 } else { 298 (void) qassociate(rq, -1); 299 } 300 301 /* 302 * Enable the queue srv(9e) routine. 303 */ 304 qprocson(rq); 305 306 /* 307 * Construct a cloned dev_t to hand back. 308 */ 309 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 310 return (0); 311 312 failed: 313 dld_str_destroy(dsp); 314 return (err); 315 } 316 317 /* 318 * qi_qclose: close(9e) 319 */ 320 int 321 dld_close(queue_t *rq) 322 { 323 dld_str_t *dsp = rq->q_ptr; 324 325 /* 326 * Wait until pending requests are processed. 327 */ 328 mutex_enter(&dsp->ds_thr_lock); 329 while (dsp->ds_pending_cnt > 0) 330 cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); 331 mutex_exit(&dsp->ds_thr_lock); 332 333 /* 334 * Disable the queue srv(9e) routine. 335 */ 336 qprocsoff(rq); 337 338 /* 339 * At this point we can not be entered by any threads via STREAMS 340 * or the direct call interface, which is available only to IP. 341 * After the interface is unplumbed, IP wouldn't have any reference 342 * to this instance, and therefore we are now effectively single 343 * threaded and don't require any lock protection. Flush all 344 * pending packets which are sitting in the transmit queue. 345 */ 346 ASSERT(dsp->ds_thr == 0); 347 dld_tx_flush(dsp); 348 349 /* 350 * This stream was open to a provider node. Check to see 351 * if it has been cleanly shut down. 352 */ 353 if (dsp->ds_dlstate != DL_UNATTACHED) { 354 /* 355 * The stream is either open to a style 1 provider or 356 * this is not clean shutdown. Detach from the PPA. 357 * (This is still ok even in the style 1 case). 358 */ 359 dld_str_detach(dsp); 360 } 361 362 dld_str_destroy(dsp); 363 return (0); 364 } 365 366 /* 367 * qi_qputp: put(9e) 368 */ 369 void 370 dld_wput(queue_t *wq, mblk_t *mp) 371 { 372 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 373 374 DLD_ENTER(dsp); 375 376 switch (DB_TYPE(mp)) { 377 case M_DATA: 378 rw_enter(&dsp->ds_lock, RW_READER); 379 if (dsp->ds_dlstate != DL_IDLE || 380 dsp->ds_mode == DLD_UNITDATA) { 381 freemsg(mp); 382 } else if (dsp->ds_mode == DLD_FASTPATH) { 383 str_mdata_fastpath_put(dsp, mp); 384 } else if (dsp->ds_mode == DLD_RAW) { 385 str_mdata_raw_put(dsp, mp); 386 } 387 rw_exit(&dsp->ds_lock); 388 break; 389 case M_PROTO: 390 case M_PCPROTO: 391 dld_proto(dsp, mp); 392 break; 393 case M_IOCTL: 394 dld_ioc(dsp, mp); 395 break; 396 case M_FLUSH: 397 if (*mp->b_rptr & FLUSHW) { 398 dld_tx_flush(dsp); 399 *mp->b_rptr &= ~FLUSHW; 400 } 401 402 if (*mp->b_rptr & FLUSHR) { 403 qreply(wq, mp); 404 } else { 405 freemsg(mp); 406 } 407 break; 408 default: 409 freemsg(mp); 410 break; 411 } 412 413 DLD_EXIT(dsp); 414 } 415 416 /* 417 * qi_srvp: srv(9e) 418 */ 419 void 420 dld_wsrv(queue_t *wq) 421 { 422 mblk_t *mp; 423 dld_str_t *dsp = wq->q_ptr; 424 425 DLD_ENTER(dsp); 426 rw_enter(&dsp->ds_lock, RW_READER); 427 /* 428 * Grab all packets (chained via b_next) off our transmit queue 429 * and try to send them all to the MAC layer. Since the queue 430 * is independent of streams, we are able to dequeue all messages 431 * at once without looping through getq() and manually chaining 432 * them. Note that the queue size parameters (byte and message 433 * counts) are cleared as well, but we postpone the backenabling 434 * until after the MAC transmit since some packets may end up 435 * back at our transmit queue. 436 */ 437 mutex_enter(&dsp->ds_tx_list_lock); 438 if ((mp = dsp->ds_tx_list_head) == NULL) { 439 ASSERT(!dsp->ds_tx_qbusy); 440 ASSERT(dsp->ds_tx_flow_mp != NULL); 441 ASSERT(dsp->ds_tx_list_head == NULL); 442 ASSERT(dsp->ds_tx_list_tail == NULL); 443 ASSERT(dsp->ds_tx_cnt == 0); 444 ASSERT(dsp->ds_tx_msgcnt == 0); 445 mutex_exit(&dsp->ds_tx_list_lock); 446 rw_exit(&dsp->ds_lock); 447 DLD_EXIT(dsp); 448 return; 449 } 450 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 451 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 452 mutex_exit(&dsp->ds_tx_list_lock); 453 454 /* 455 * Discard packets unless we are attached and bound; note that 456 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 457 * because regardless of the mode all transmit will end up in 458 * dld_tx_single() where the packets may be queued. 459 */ 460 ASSERT(DB_TYPE(mp) == M_DATA); 461 if (dsp->ds_dlstate != DL_IDLE) { 462 freemsgchain(mp); 463 goto done; 464 } 465 466 /* 467 * Attempt to transmit one or more packets. If the MAC can't 468 * send them all, re-queue the packet(s) at the beginning of 469 * the transmit queue to avoid any re-ordering. 470 */ 471 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 472 dld_tx_enqueue(dsp, mp, B_TRUE); 473 474 done: 475 /* 476 * Grab the list lock again and check if the transmit queue is 477 * really empty; if so, lift up flow-control and backenable any 478 * writer queues. If the queue is not empty, schedule service 479 * thread to drain it. 480 */ 481 mutex_enter(&dsp->ds_tx_list_lock); 482 if (dsp->ds_tx_list_head == NULL) { 483 dsp->ds_tx_flow_mp = getq(wq); 484 ASSERT(dsp->ds_tx_flow_mp != NULL); 485 dsp->ds_tx_qbusy = B_FALSE; 486 } 487 mutex_exit(&dsp->ds_tx_list_lock); 488 489 rw_exit(&dsp->ds_lock); 490 DLD_EXIT(dsp); 491 } 492 493 void 494 dld_init_ops(struct dev_ops *ops, const char *name) 495 { 496 struct streamtab *stream; 497 struct qinit *rq, *wq; 498 struct module_info *modinfo; 499 500 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 501 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 502 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 503 modinfo->mi_minpsz = 0; 504 modinfo->mi_maxpsz = 64*1024; 505 modinfo->mi_hiwat = 1; 506 modinfo->mi_lowat = 0; 507 508 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 509 rq->qi_qopen = dld_open; 510 rq->qi_qclose = dld_close; 511 rq->qi_minfo = modinfo; 512 513 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 514 wq->qi_putp = (pfi_t)dld_wput; 515 wq->qi_srvp = (pfi_t)dld_wsrv; 516 wq->qi_minfo = modinfo; 517 518 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 519 stream->st_rdinit = rq; 520 stream->st_wrinit = wq; 521 ops->devo_cb_ops->cb_str = stream; 522 523 ops->devo_getinfo = &dld_getinfo; 524 } 525 526 void 527 dld_fini_ops(struct dev_ops *ops) 528 { 529 struct streamtab *stream; 530 struct qinit *rq, *wq; 531 struct module_info *modinfo; 532 533 stream = ops->devo_cb_ops->cb_str; 534 rq = stream->st_rdinit; 535 wq = stream->st_wrinit; 536 modinfo = rq->qi_minfo; 537 ASSERT(wq->qi_minfo == modinfo); 538 539 kmem_free(stream, sizeof (struct streamtab)); 540 kmem_free(wq, sizeof (struct qinit)); 541 kmem_free(rq, sizeof (struct qinit)); 542 kmem_free(modinfo->mi_idname, FMNAMESZ); 543 kmem_free(modinfo, sizeof (struct module_info)); 544 } 545 546 /* 547 * Initialize this module's data structures. 548 */ 549 void 550 dld_str_init(void) 551 { 552 /* 553 * Create dld_str_t object cache. 554 */ 555 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 556 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 557 ASSERT(str_cachep != NULL); 558 559 /* 560 * Create a hash table for maintaining dld_str_t's. 561 * The ds_minor field (the clone minor number) of a dld_str_t 562 * is used as a key for this hash table because this number is 563 * globally unique (allocated from "dls_minor_arena"). 564 */ 565 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 566 mod_hash_null_valdtor); 567 } 568 569 /* 570 * Tear down this module's data structures. 571 */ 572 int 573 dld_str_fini(void) 574 { 575 /* 576 * Make sure that there are no objects in use. 577 */ 578 if (str_count != 0) 579 return (EBUSY); 580 581 /* 582 * Check to see if there are any minor numbers still in use. 583 */ 584 if (minor_count != 0) 585 return (EBUSY); 586 587 /* 588 * Destroy object cache. 589 */ 590 kmem_cache_destroy(str_cachep); 591 mod_hash_destroy_idhash(str_hashp); 592 return (0); 593 } 594 595 /* 596 * Create a new dld_str_t object. 597 */ 598 dld_str_t * 599 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 600 { 601 dld_str_t *dsp; 602 int err; 603 604 /* 605 * Allocate an object from the cache. 606 */ 607 atomic_add_32(&str_count, 1); 608 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 609 610 /* 611 * Allocate the dummy mblk for flow-control. 612 */ 613 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 614 if (dsp->ds_tx_flow_mp == NULL) { 615 kmem_cache_free(str_cachep, dsp); 616 atomic_add_32(&str_count, -1); 617 return (NULL); 618 } 619 dsp->ds_type = type; 620 dsp->ds_major = major; 621 dsp->ds_style = style; 622 623 /* 624 * Initialize the queue pointers. 625 */ 626 ASSERT(RD(rq) == rq); 627 dsp->ds_rq = rq; 628 dsp->ds_wq = WR(rq); 629 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 630 631 /* 632 * We want explicit control over our write-side STREAMS queue 633 * where the dummy mblk gets added/removed for flow-control. 634 */ 635 noenable(WR(rq)); 636 637 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 638 (mod_hash_val_t)dsp); 639 ASSERT(err == 0); 640 return (dsp); 641 } 642 643 /* 644 * Destroy a dld_str_t object. 645 */ 646 void 647 dld_str_destroy(dld_str_t *dsp) 648 { 649 queue_t *rq; 650 queue_t *wq; 651 mod_hash_val_t val; 652 /* 653 * Clear the queue pointers. 654 */ 655 rq = dsp->ds_rq; 656 wq = dsp->ds_wq; 657 ASSERT(wq == WR(rq)); 658 659 rq->q_ptr = wq->q_ptr = NULL; 660 dsp->ds_rq = dsp->ds_wq = NULL; 661 662 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 663 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 664 ASSERT(dsp->ds_tx_list_head == NULL); 665 ASSERT(dsp->ds_tx_list_tail == NULL); 666 ASSERT(dsp->ds_tx_cnt == 0); 667 ASSERT(dsp->ds_tx_msgcnt == 0); 668 ASSERT(!dsp->ds_tx_qbusy); 669 670 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 671 ASSERT(dsp->ds_thr == 0); 672 ASSERT(dsp->ds_pending_req == NULL); 673 674 /* 675 * Reinitialize all the flags. 676 */ 677 dsp->ds_notifications = 0; 678 dsp->ds_passivestate = DLD_UNINITIALIZED; 679 dsp->ds_mode = DLD_UNITDATA; 680 dsp->ds_native = B_FALSE; 681 682 /* 683 * Free the dummy mblk if exists. 684 */ 685 if (dsp->ds_tx_flow_mp != NULL) { 686 freeb(dsp->ds_tx_flow_mp); 687 dsp->ds_tx_flow_mp = NULL; 688 } 689 690 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 691 ASSERT(dsp == (dld_str_t *)val); 692 693 /* 694 * Free the object back to the cache. 695 */ 696 kmem_cache_free(str_cachep, dsp); 697 atomic_add_32(&str_count, -1); 698 } 699 700 /* 701 * kmem_cache contructor function: see kmem_cache_create(9f). 702 */ 703 /*ARGSUSED*/ 704 static int 705 str_constructor(void *buf, void *cdrarg, int kmflags) 706 { 707 dld_str_t *dsp = buf; 708 709 bzero(buf, sizeof (dld_str_t)); 710 711 /* 712 * Allocate a new minor number. 713 */ 714 atomic_add_32(&minor_count, 1); 715 if ((dsp->ds_minor = dls_minor_hold(kmflags == KM_SLEEP)) == 0) { 716 atomic_add_32(&minor_count, -1); 717 return (-1); 718 } 719 720 /* 721 * Initialize the DLPI state machine. 722 */ 723 dsp->ds_dlstate = DL_UNATTACHED; 724 dsp->ds_ppa = (t_uscalar_t)-1; 725 726 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 727 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 728 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 729 cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); 730 731 return (0); 732 } 733 734 /* 735 * kmem_cache destructor function. 736 */ 737 /*ARGSUSED*/ 738 static void 739 str_destructor(void *buf, void *cdrarg) 740 { 741 dld_str_t *dsp = buf; 742 743 /* 744 * Make sure the DLPI state machine was reset. 745 */ 746 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 747 748 /* 749 * Make sure the data-link interface was closed. 750 */ 751 ASSERT(dsp->ds_mh == NULL); 752 ASSERT(dsp->ds_dc == NULL); 753 754 /* 755 * Make sure enabled notifications are cleared. 756 */ 757 ASSERT(dsp->ds_notifications == 0); 758 759 /* 760 * Make sure polling is disabled. 761 */ 762 ASSERT(!dsp->ds_polling); 763 764 /* 765 * Release the minor number. 766 */ 767 dls_minor_rele(dsp->ds_minor); 768 atomic_add_32(&minor_count, -1); 769 770 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 771 rw_destroy(&dsp->ds_lock); 772 773 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 774 mutex_destroy(&dsp->ds_tx_list_lock); 775 ASSERT(dsp->ds_tx_flow_mp == NULL); 776 777 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 778 mutex_destroy(&dsp->ds_thr_lock); 779 ASSERT(dsp->ds_pending_req == NULL); 780 ASSERT(dsp->ds_pending_op == NULL); 781 ASSERT(dsp->ds_pending_cnt == 0); 782 cv_destroy(&dsp->ds_pending_cv); 783 } 784 785 /* 786 * M_DATA put. Note that mp is a single message, not a chained message. 787 */ 788 void 789 dld_tx_single(dld_str_t *dsp, mblk_t *mp) 790 { 791 /* 792 * This function can be called from within dld or from an upper 793 * layer protocol (currently only tcp). If we are in the busy 794 * mode enqueue the packet(s) and return. Otherwise hand them 795 * over to the MAC driver for transmission; any remaining one(s) 796 * which didn't get sent will be queued. 797 * 798 * Note here that we don't grab the list lock prior to checking 799 * the busy flag. This is okay, because a missed transition 800 * will not cause any packet reordering for any particular TCP 801 * connection (which is single-threaded). The enqueue routine 802 * will atomically set the busy flag and schedule the service 803 * thread to run; the flag is only cleared by the service thread 804 * when there is no more packet to be transmitted. 805 */ 806 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 807 dld_tx_enqueue(dsp, mp, B_FALSE); 808 } 809 810 /* 811 * Update the priority bits and VID (may need to insert tag if mp points 812 * to an untagged packet. 813 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 814 */ 815 static mblk_t * 816 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid) 817 { 818 mblk_t *hmp; 819 struct ether_vlan_header *evhp; 820 struct ether_header *ehp; 821 uint16_t old_tci = 0; 822 size_t len; 823 824 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 825 826 evhp = (struct ether_vlan_header *)mp->b_rptr; 827 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 828 /* 829 * Tagged packet, update the priority bits. 830 */ 831 old_tci = ntohs(evhp->ether_tci); 832 len = sizeof (struct ether_vlan_header); 833 834 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 835 /* 836 * In case some drivers only check the db_ref 837 * count of the first mblk, we pullup the 838 * message into a single mblk. 839 */ 840 hmp = msgpullup(mp, -1); 841 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 842 freemsg(hmp); 843 return (NULL); 844 } else { 845 freemsg(mp); 846 mp = hmp; 847 } 848 } 849 850 evhp = (struct ether_vlan_header *)mp->b_rptr; 851 } else { 852 /* 853 * Untagged packet. Insert the special priority tag. 854 * First allocate a header mblk. 855 */ 856 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 857 if (hmp == NULL) 858 return (NULL); 859 860 evhp = (struct ether_vlan_header *)hmp->b_rptr; 861 ehp = (struct ether_header *)mp->b_rptr; 862 863 /* 864 * Copy the MAC addresses and typelen 865 */ 866 bcopy(ehp, evhp, (ETHERADDRL * 2)); 867 evhp->ether_type = ehp->ether_type; 868 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 869 870 hmp->b_wptr += sizeof (struct ether_vlan_header); 871 mp->b_rptr += sizeof (struct ether_header); 872 873 /* 874 * Free the original message if it's now empty. Link the 875 * rest of messages to the header message. 876 */ 877 if (MBLKL(mp) == 0) { 878 hmp->b_cont = mp->b_cont; 879 freeb(mp); 880 } else { 881 hmp->b_cont = mp; 882 } 883 mp = hmp; 884 } 885 886 if (pri == 0) 887 pri = VLAN_PRI(old_tci); 888 if (vid == VLAN_ID_NONE) 889 vid = VLAN_ID(old_tci); 890 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 891 return (mp); 892 } 893 894 /* 895 * M_DATA put (IP fast-path mode) 896 */ 897 void 898 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 899 { 900 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 901 mblk_t *newmp; 902 uint_t pri; 903 904 if (is_ethernet) { 905 /* 906 * Update the priority bits to the assigned priority. 907 */ 908 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 909 910 if (pri != 0) { 911 newmp = i_dld_ether_header_update_tag(mp, pri, 912 VLAN_ID_NONE); 913 if (newmp == NULL) 914 goto discard; 915 mp = newmp; 916 } 917 } 918 919 dld_tx_single(dsp, mp); 920 return; 921 922 discard: 923 /* TODO: bump kstat? */ 924 freemsg(mp); 925 } 926 927 /* 928 * M_DATA put (DLIOCRAW mode) 929 */ 930 static void 931 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 932 { 933 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 934 mblk_t *bp, *newmp; 935 size_t size; 936 mac_header_info_t mhi; 937 uint_t pri, vid; 938 939 /* 940 * Certain MAC type plugins provide an illusion for raw DLPI 941 * consumers. They pretend that the MAC layer is something that 942 * it's not for the benefit of observability tools. For example, 943 * mac_wifi pretends that it's Ethernet for such consumers. 944 * Here, unless native mode is enabled, we call into the MAC layer so 945 * that this illusion can be maintained. The plugin will optionally 946 * transform the MAC header here into something that can be passed 947 * down. The header goes from raw mode to "cooked" mode. 948 */ 949 if (!dsp->ds_native) { 950 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 951 goto discard; 952 mp = newmp; 953 } 954 955 size = MBLKL(mp); 956 957 /* 958 * Check the packet is not too big and that any remaining 959 * fragment list is composed entirely of M_DATA messages. (We 960 * know the first fragment was M_DATA otherwise we could not 961 * have got here). 962 */ 963 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 964 if (DB_TYPE(bp) != M_DATA) 965 goto discard; 966 size += MBLKL(bp); 967 } 968 969 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 970 goto discard; 971 972 /* 973 * If LSO is enabled, check the size against lso_max. Otherwise, 974 * compare the packet size with sdu_max. 975 */ 976 if (size > (dsp->ds_lso ? dsp->ds_lso_max : dsp->ds_mip->mi_sdu_max) 977 + mhi.mhi_hdrsize) 978 goto discard; 979 980 if (is_ethernet) { 981 /* 982 * Discard the packet if this is a VLAN stream but the VID in 983 * the packet is not correct. 984 */ 985 vid = VLAN_ID(mhi.mhi_tci); 986 if ((dsp->ds_vid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 987 goto discard; 988 989 /* 990 * Discard the packet if this packet is a tagged packet 991 * but both pri and VID are 0. 992 */ 993 pri = VLAN_PRI(mhi.mhi_tci); 994 if (mhi.mhi_istagged && (pri == 0) && (vid == VLAN_ID_NONE)) 995 goto discard; 996 997 /* 998 * Update the priority bits to the per-stream priority if 999 * priority is not set in the packet. Update the VID for 1000 * packets on a VLAN stream. 1001 */ 1002 pri = (pri == 0) ? dsp->ds_pri : 0; 1003 if ((pri != 0) || (dsp->ds_vid != VLAN_ID_NONE)) { 1004 if ((newmp = i_dld_ether_header_update_tag(mp, 1005 pri, dsp->ds_vid)) == NULL) { 1006 goto discard; 1007 } 1008 mp = newmp; 1009 } 1010 } 1011 1012 dld_tx_single(dsp, mp); 1013 return; 1014 1015 discard: 1016 /* TODO: bump kstat? */ 1017 freemsg(mp); 1018 } 1019 1020 /* 1021 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 1022 */ 1023 int 1024 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1025 { 1026 int err; 1027 const char *drvname; 1028 char name[MAXNAMELEN]; 1029 dls_channel_t dc; 1030 uint_t addr_length; 1031 1032 ASSERT(dsp->ds_dc == NULL); 1033 1034 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1035 return (EINVAL); 1036 1037 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 1038 1039 if (strcmp(drvname, "aggr") != 0 && strcmp(drvname, "vnic") != 0 && 1040 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1041 return (EINVAL); 1042 1043 /* 1044 * Open a channel. 1045 */ 1046 if ((err = dls_open(name, &dc)) != 0) { 1047 (void) qassociate(dsp->ds_wq, -1); 1048 return (err); 1049 } 1050 1051 /* 1052 * Cache the MAC interface handle, a pointer to the immutable MAC 1053 * information and the current and 'factory' MAC address. 1054 */ 1055 dsp->ds_mh = dls_mac(dc); 1056 dsp->ds_mip = mac_info(dsp->ds_mh); 1057 1058 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1059 1060 addr_length = dsp->ds_mip->mi_addr_length; 1061 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 1062 1063 /* 1064 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 1065 * a non-VLAN interface). 1066 */ 1067 dsp->ds_vid = dls_vid(dc); 1068 1069 /* 1070 * Set the default packet priority. 1071 */ 1072 dsp->ds_pri = 0; 1073 1074 /* 1075 * Add a notify function so that the we get updates from the MAC. 1076 */ 1077 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 1078 1079 dsp->ds_ppa = ppa; 1080 dsp->ds_dc = dc; 1081 dsp->ds_dlstate = DL_UNBOUND; 1082 1083 return (0); 1084 } 1085 1086 /* 1087 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1088 * from close(2) for style 2. 1089 */ 1090 void 1091 dld_str_detach(dld_str_t *dsp) 1092 { 1093 ASSERT(dsp->ds_thr == 0); 1094 1095 /* 1096 * Remove the notify function. 1097 */ 1098 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 1099 1100 /* 1101 * Clear the polling and promisc flags. 1102 */ 1103 dsp->ds_polling = B_FALSE; 1104 dsp->ds_soft_ring = B_FALSE; 1105 dsp->ds_promisc = 0; 1106 1107 /* 1108 * Clear LSO flags. 1109 */ 1110 dsp->ds_lso = B_FALSE; 1111 dsp->ds_lso_max = 0; 1112 1113 /* 1114 * Close the channel. 1115 */ 1116 dls_close(dsp->ds_dc); 1117 dsp->ds_ppa = (t_uscalar_t)-1; 1118 dsp->ds_dc = NULL; 1119 dsp->ds_mh = NULL; 1120 1121 (void) qassociate(dsp->ds_wq, -1); 1122 1123 /* 1124 * Re-initialize the DLPI state machine. 1125 */ 1126 dsp->ds_dlstate = DL_UNATTACHED; 1127 1128 } 1129 1130 /* 1131 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1132 * tags before sending packets up to the DLS clients, with the exception of 1133 * special priority tagged packets, in that case, we set the VID to 0. 1134 * mp must be a VLAN tagged packet. 1135 */ 1136 static mblk_t * 1137 i_dld_ether_header_strip_tag(mblk_t *mp) 1138 { 1139 mblk_t *newmp; 1140 struct ether_vlan_header *evhp; 1141 uint16_t tci, new_tci; 1142 1143 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1144 if (DB_REF(mp) > 1) { 1145 newmp = copymsg(mp); 1146 if (newmp == NULL) 1147 return (NULL); 1148 freemsg(mp); 1149 mp = newmp; 1150 } 1151 evhp = (struct ether_vlan_header *)mp->b_rptr; 1152 1153 tci = ntohs(evhp->ether_tci); 1154 if (VLAN_PRI(tci) == 0) { 1155 /* 1156 * Priority is 0, strip the tag. 1157 */ 1158 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1159 mp->b_rptr += VLAN_TAGSZ; 1160 } else { 1161 /* 1162 * Priority is not 0, update the VID to 0. 1163 */ 1164 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1165 evhp->ether_tci = htons(new_tci); 1166 } 1167 return (mp); 1168 } 1169 1170 /* 1171 * Raw mode receive function. 1172 */ 1173 /*ARGSUSED*/ 1174 void 1175 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1176 mac_header_info_t *mhip) 1177 { 1178 dld_str_t *dsp = (dld_str_t *)arg; 1179 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1180 mblk_t *next, *newmp; 1181 1182 ASSERT(mp != NULL); 1183 do { 1184 /* 1185 * Get the pointer to the next packet in the chain and then 1186 * clear b_next before the packet gets passed on. 1187 */ 1188 next = mp->b_next; 1189 mp->b_next = NULL; 1190 1191 /* 1192 * Wind back b_rptr to point at the MAC header. 1193 */ 1194 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1195 mp->b_rptr -= mhip->mhi_hdrsize; 1196 1197 /* 1198 * Certain MAC type plugins provide an illusion for raw 1199 * DLPI consumers. They pretend that the MAC layer is 1200 * something that it's not for the benefit of observability 1201 * tools. For example, mac_wifi pretends that it's Ethernet 1202 * for such consumers. Here, unless native mode is enabled, 1203 * we call into the MAC layer so that this illusion can be 1204 * maintained. The plugin will optionally transform the MAC 1205 * header here into something that can be passed up to raw 1206 * consumers. The header goes from "cooked" mode to raw mode. 1207 */ 1208 if (!dsp->ds_native) { 1209 newmp = mac_header_uncook(dsp->ds_mh, mp); 1210 if (newmp == NULL) { 1211 freemsg(mp); 1212 goto next; 1213 } 1214 mp = newmp; 1215 } 1216 1217 /* 1218 * Strip the VLAN tag for VLAN streams. 1219 */ 1220 if (is_ethernet && dsp->ds_vid != VLAN_ID_NONE) { 1221 newmp = i_dld_ether_header_strip_tag(mp); 1222 if (newmp == NULL) { 1223 freemsg(mp); 1224 goto next; 1225 } 1226 mp = newmp; 1227 } 1228 1229 /* 1230 * Pass the packet on. 1231 */ 1232 if (canputnext(dsp->ds_rq)) 1233 putnext(dsp->ds_rq, mp); 1234 else 1235 freemsg(mp); 1236 1237 next: 1238 /* 1239 * Move on to the next packet in the chain. 1240 */ 1241 mp = next; 1242 } while (mp != NULL); 1243 } 1244 1245 /* 1246 * Fast-path receive function. 1247 */ 1248 /*ARGSUSED*/ 1249 void 1250 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1251 mac_header_info_t *mhip) 1252 { 1253 dld_str_t *dsp = (dld_str_t *)arg; 1254 mblk_t *next; 1255 size_t offset = 0; 1256 1257 /* 1258 * MAC header stripping rules: 1259 * - Tagged packets: 1260 * a. VLAN streams. Strip the whole VLAN header including the tag. 1261 * b. Physical streams 1262 * - VLAN packets (non-zero VID). The stream must be either a 1263 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1264 * Strip the Ethernet header but keep the VLAN header. 1265 * - Special tagged packets (zero VID) 1266 * * The stream is either a DL_PROMISC_SAP listener or a 1267 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1268 * keep the VLAN header. 1269 * * Otherwise, strip the whole VLAN header. 1270 * - Untagged packets. Strip the whole MAC header. 1271 */ 1272 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1273 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1274 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1275 offset = VLAN_TAGSZ; 1276 } 1277 1278 ASSERT(mp != NULL); 1279 do { 1280 /* 1281 * Get the pointer to the next packet in the chain and then 1282 * clear b_next before the packet gets passed on. 1283 */ 1284 next = mp->b_next; 1285 mp->b_next = NULL; 1286 1287 /* 1288 * Wind back b_rptr to point at the VLAN header. 1289 */ 1290 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1291 mp->b_rptr -= offset; 1292 1293 /* 1294 * Pass the packet on. 1295 */ 1296 if (canputnext(dsp->ds_rq)) 1297 putnext(dsp->ds_rq, mp); 1298 else 1299 freemsg(mp); 1300 /* 1301 * Move on to the next packet in the chain. 1302 */ 1303 mp = next; 1304 } while (mp != NULL); 1305 } 1306 1307 /* 1308 * Default receive function (send DL_UNITDATA_IND messages). 1309 */ 1310 /*ARGSUSED*/ 1311 void 1312 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1313 mac_header_info_t *mhip) 1314 { 1315 dld_str_t *dsp = (dld_str_t *)arg; 1316 mblk_t *ud_mp; 1317 mblk_t *next; 1318 size_t offset = 0; 1319 boolean_t strip_vlan = B_TRUE; 1320 1321 /* 1322 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1323 */ 1324 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1325 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1326 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1327 offset = VLAN_TAGSZ; 1328 strip_vlan = B_FALSE; 1329 } 1330 1331 ASSERT(mp != NULL); 1332 do { 1333 /* 1334 * Get the pointer to the next packet in the chain and then 1335 * clear b_next before the packet gets passed on. 1336 */ 1337 next = mp->b_next; 1338 mp->b_next = NULL; 1339 1340 /* 1341 * Wind back b_rptr to point at the MAC header. 1342 */ 1343 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1344 mp->b_rptr -= mhip->mhi_hdrsize; 1345 1346 /* 1347 * Create the DL_UNITDATA_IND M_PROTO. 1348 */ 1349 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1350 freemsgchain(mp); 1351 return; 1352 } 1353 1354 /* 1355 * Advance b_rptr to point at the payload (or the VLAN header). 1356 */ 1357 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1358 1359 /* 1360 * Prepend the DL_UNITDATA_IND. 1361 */ 1362 ud_mp->b_cont = mp; 1363 1364 /* 1365 * Send the message. 1366 */ 1367 if (canputnext(dsp->ds_rq)) 1368 putnext(dsp->ds_rq, ud_mp); 1369 else 1370 freemsg(ud_mp); 1371 1372 /* 1373 * Move on to the next packet in the chain. 1374 */ 1375 mp = next; 1376 } while (mp != NULL); 1377 } 1378 1379 /* 1380 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1381 * current state of the interface. 1382 */ 1383 void 1384 dld_str_notify_ind(dld_str_t *dsp) 1385 { 1386 mac_notify_type_t type; 1387 1388 for (type = 0; type < MAC_NNOTE; type++) 1389 str_notify(dsp, type); 1390 } 1391 1392 typedef struct dl_unitdata_ind_wrapper { 1393 dl_unitdata_ind_t dl_unitdata; 1394 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1395 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1396 } dl_unitdata_ind_wrapper_t; 1397 1398 /* 1399 * Create a DL_UNITDATA_IND M_PROTO message. 1400 */ 1401 static mblk_t * 1402 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1403 { 1404 mblk_t *nmp; 1405 dl_unitdata_ind_wrapper_t *dlwp; 1406 dl_unitdata_ind_t *dlp; 1407 mac_header_info_t mhi; 1408 uint_t addr_length; 1409 uint8_t *daddr; 1410 uint8_t *saddr; 1411 1412 /* 1413 * Get the packet header information. 1414 */ 1415 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 1416 return (NULL); 1417 1418 /* 1419 * Allocate a message large enough to contain the wrapper structure 1420 * defined above. 1421 */ 1422 if ((nmp = mexchange(dsp->ds_wq, NULL, 1423 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1424 DL_UNITDATA_IND)) == NULL) 1425 return (NULL); 1426 1427 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1428 1429 dlp = &(dlwp->dl_unitdata); 1430 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1431 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1432 1433 /* 1434 * Copy in the destination address. 1435 */ 1436 addr_length = dsp->ds_mip->mi_addr_length; 1437 daddr = dlwp->dl_dest_addr; 1438 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1439 bcopy(mhi.mhi_daddr, daddr, addr_length); 1440 1441 /* 1442 * Set the destination DLSAP to the SAP value encoded in the packet. 1443 */ 1444 if (mhi.mhi_istagged && !strip_vlan) 1445 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1446 else 1447 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1448 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1449 1450 /* 1451 * If the destination address was multicast or broadcast then the 1452 * dl_group_address field should be non-zero. 1453 */ 1454 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1455 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1456 1457 /* 1458 * Copy in the source address if one exists. Some MAC types (DL_IB 1459 * for example) may not have access to source information. 1460 */ 1461 if (mhi.mhi_saddr == NULL) { 1462 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1463 } else { 1464 saddr = dlwp->dl_src_addr; 1465 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1466 bcopy(mhi.mhi_saddr, saddr, addr_length); 1467 1468 /* 1469 * Set the source DLSAP to the packet ethertype. 1470 */ 1471 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1472 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1473 } 1474 1475 return (nmp); 1476 } 1477 1478 /* 1479 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1480 */ 1481 static void 1482 str_notify_promisc_on_phys(dld_str_t *dsp) 1483 { 1484 mblk_t *mp; 1485 dl_notify_ind_t *dlip; 1486 1487 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1488 return; 1489 1490 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1491 M_PROTO, 0)) == NULL) 1492 return; 1493 1494 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1495 dlip = (dl_notify_ind_t *)mp->b_rptr; 1496 dlip->dl_primitive = DL_NOTIFY_IND; 1497 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1498 1499 qreply(dsp->ds_wq, mp); 1500 } 1501 1502 /* 1503 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1504 */ 1505 static void 1506 str_notify_promisc_off_phys(dld_str_t *dsp) 1507 { 1508 mblk_t *mp; 1509 dl_notify_ind_t *dlip; 1510 1511 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1512 return; 1513 1514 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1515 M_PROTO, 0)) == NULL) 1516 return; 1517 1518 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1519 dlip = (dl_notify_ind_t *)mp->b_rptr; 1520 dlip->dl_primitive = DL_NOTIFY_IND; 1521 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1522 1523 qreply(dsp->ds_wq, mp); 1524 } 1525 1526 /* 1527 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1528 */ 1529 static void 1530 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1531 { 1532 mblk_t *mp; 1533 dl_notify_ind_t *dlip; 1534 uint_t addr_length; 1535 uint16_t ethertype; 1536 1537 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1538 return; 1539 1540 addr_length = dsp->ds_mip->mi_addr_length; 1541 if ((mp = mexchange(dsp->ds_wq, NULL, 1542 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1543 M_PROTO, 0)) == NULL) 1544 return; 1545 1546 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1547 dlip = (dl_notify_ind_t *)mp->b_rptr; 1548 dlip->dl_primitive = DL_NOTIFY_IND; 1549 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1550 dlip->dl_data = DL_CURR_PHYS_ADDR; 1551 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1552 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1553 1554 bcopy(addr, &dlip[1], addr_length); 1555 1556 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1557 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1558 1559 qreply(dsp->ds_wq, mp); 1560 } 1561 1562 /* 1563 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1564 */ 1565 static void 1566 str_notify_link_up(dld_str_t *dsp) 1567 { 1568 mblk_t *mp; 1569 dl_notify_ind_t *dlip; 1570 1571 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1572 return; 1573 1574 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1575 M_PROTO, 0)) == NULL) 1576 return; 1577 1578 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1579 dlip = (dl_notify_ind_t *)mp->b_rptr; 1580 dlip->dl_primitive = DL_NOTIFY_IND; 1581 dlip->dl_notification = DL_NOTE_LINK_UP; 1582 1583 qreply(dsp->ds_wq, mp); 1584 } 1585 1586 /* 1587 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1588 */ 1589 static void 1590 str_notify_link_down(dld_str_t *dsp) 1591 { 1592 mblk_t *mp; 1593 dl_notify_ind_t *dlip; 1594 1595 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1596 return; 1597 1598 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1599 M_PROTO, 0)) == NULL) 1600 return; 1601 1602 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1603 dlip = (dl_notify_ind_t *)mp->b_rptr; 1604 dlip->dl_primitive = DL_NOTIFY_IND; 1605 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1606 1607 qreply(dsp->ds_wq, mp); 1608 } 1609 1610 /* 1611 * DL_NOTIFY_IND: DL_NOTE_SPEED 1612 */ 1613 static void 1614 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1615 { 1616 mblk_t *mp; 1617 dl_notify_ind_t *dlip; 1618 1619 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1620 return; 1621 1622 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1623 M_PROTO, 0)) == NULL) 1624 return; 1625 1626 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1627 dlip = (dl_notify_ind_t *)mp->b_rptr; 1628 dlip->dl_primitive = DL_NOTIFY_IND; 1629 dlip->dl_notification = DL_NOTE_SPEED; 1630 dlip->dl_data = speed; 1631 1632 qreply(dsp->ds_wq, mp); 1633 } 1634 1635 /* 1636 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1637 */ 1638 static void 1639 str_notify_capab_reneg(dld_str_t *dsp) 1640 { 1641 mblk_t *mp; 1642 dl_notify_ind_t *dlip; 1643 1644 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1645 return; 1646 1647 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1648 M_PROTO, 0)) == NULL) 1649 return; 1650 1651 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1652 dlip = (dl_notify_ind_t *)mp->b_rptr; 1653 dlip->dl_primitive = DL_NOTIFY_IND; 1654 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1655 1656 qreply(dsp->ds_wq, mp); 1657 } 1658 1659 /* 1660 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1661 */ 1662 static void 1663 str_notify_fastpath_flush(dld_str_t *dsp) 1664 { 1665 mblk_t *mp; 1666 dl_notify_ind_t *dlip; 1667 1668 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1669 return; 1670 1671 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1672 M_PROTO, 0)) == NULL) 1673 return; 1674 1675 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1676 dlip = (dl_notify_ind_t *)mp->b_rptr; 1677 dlip->dl_primitive = DL_NOTIFY_IND; 1678 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1679 1680 qreply(dsp->ds_wq, mp); 1681 } 1682 1683 /* 1684 * MAC notification callback. 1685 */ 1686 static void 1687 str_notify(void *arg, mac_notify_type_t type) 1688 { 1689 dld_str_t *dsp = (dld_str_t *)arg; 1690 queue_t *q = dsp->ds_wq; 1691 1692 switch (type) { 1693 case MAC_NOTE_TX: 1694 qenable(q); 1695 break; 1696 1697 case MAC_NOTE_DEVPROMISC: 1698 /* 1699 * Send the appropriate DL_NOTIFY_IND. 1700 */ 1701 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1702 str_notify_promisc_on_phys(dsp); 1703 else 1704 str_notify_promisc_off_phys(dsp); 1705 break; 1706 1707 case MAC_NOTE_PROMISC: 1708 break; 1709 1710 case MAC_NOTE_UNICST: 1711 /* 1712 * This notification is sent whenever the MAC unicast address 1713 * changes. We need to re-cache the address. 1714 */ 1715 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1716 1717 /* 1718 * Send the appropriate DL_NOTIFY_IND. 1719 */ 1720 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1721 break; 1722 1723 case MAC_NOTE_LINK: 1724 /* 1725 * This notification is sent every time the MAC driver 1726 * updates the link state. 1727 */ 1728 switch (mac_link_get(dsp->ds_mh)) { 1729 case LINK_STATE_UP: { 1730 uint64_t speed; 1731 /* 1732 * The link is up so send the appropriate 1733 * DL_NOTIFY_IND. 1734 */ 1735 str_notify_link_up(dsp); 1736 1737 speed = mac_stat_get(dsp->ds_mh, MAC_STAT_IFSPEED); 1738 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1739 break; 1740 } 1741 case LINK_STATE_DOWN: 1742 /* 1743 * The link is down so send the appropriate 1744 * DL_NOTIFY_IND. 1745 */ 1746 str_notify_link_down(dsp); 1747 break; 1748 1749 default: 1750 break; 1751 } 1752 break; 1753 1754 case MAC_NOTE_RESOURCE: 1755 case MAC_NOTE_VNIC: 1756 /* 1757 * This notification is sent whenever the MAC resources 1758 * change or capabilities change. We need to renegotiate 1759 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1760 */ 1761 str_notify_capab_reneg(dsp); 1762 break; 1763 1764 case MAC_NOTE_FASTPATH_FLUSH: 1765 str_notify_fastpath_flush(dsp); 1766 break; 1767 1768 default: 1769 ASSERT(B_FALSE); 1770 break; 1771 } 1772 } 1773 1774 /* 1775 * Enqueue one or more messages to the transmit queue. 1776 * Caller specifies the insertion position (head/tail). 1777 */ 1778 void 1779 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1780 { 1781 mblk_t *tail; 1782 queue_t *q = dsp->ds_wq; 1783 uint_t cnt, msgcnt; 1784 uint_t tot_cnt, tot_msgcnt; 1785 1786 ASSERT(DB_TYPE(mp) == M_DATA); 1787 /* Calculate total size and count of the packet(s) */ 1788 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1789 tail->b_next != NULL; tail = tail->b_next) { 1790 ASSERT(DB_TYPE(tail->b_next) == M_DATA); 1791 cnt += msgdsize(tail->b_next); 1792 msgcnt++; 1793 } 1794 1795 mutex_enter(&dsp->ds_tx_list_lock); 1796 /* 1797 * If the queue depth would exceed the allowed threshold, drop 1798 * new packet(s) and drain those already in the queue. 1799 */ 1800 tot_cnt = dsp->ds_tx_cnt + cnt; 1801 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1802 1803 if (!head_insert && 1804 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1805 ASSERT(dsp->ds_tx_qbusy); 1806 mutex_exit(&dsp->ds_tx_list_lock); 1807 freemsgchain(mp); 1808 goto done; 1809 } 1810 1811 /* Update the queue size parameters */ 1812 dsp->ds_tx_cnt = tot_cnt; 1813 dsp->ds_tx_msgcnt = tot_msgcnt; 1814 1815 /* 1816 * If the transmit queue is currently empty and we are 1817 * about to deposit the packet(s) there, switch mode to 1818 * "busy" and raise flow-control condition. 1819 */ 1820 if (!dsp->ds_tx_qbusy) { 1821 dsp->ds_tx_qbusy = B_TRUE; 1822 ASSERT(dsp->ds_tx_flow_mp != NULL); 1823 (void) putq(q, dsp->ds_tx_flow_mp); 1824 dsp->ds_tx_flow_mp = NULL; 1825 } 1826 1827 if (!head_insert) { 1828 /* Tail insertion */ 1829 if (dsp->ds_tx_list_head == NULL) 1830 dsp->ds_tx_list_head = mp; 1831 else 1832 dsp->ds_tx_list_tail->b_next = mp; 1833 dsp->ds_tx_list_tail = tail; 1834 } else { 1835 /* Head insertion */ 1836 tail->b_next = dsp->ds_tx_list_head; 1837 if (dsp->ds_tx_list_head == NULL) 1838 dsp->ds_tx_list_tail = tail; 1839 dsp->ds_tx_list_head = mp; 1840 } 1841 mutex_exit(&dsp->ds_tx_list_lock); 1842 done: 1843 /* Schedule service thread to drain the transmit queue */ 1844 if (!head_insert) 1845 qenable(q); 1846 } 1847 1848 void 1849 dld_tx_flush(dld_str_t *dsp) 1850 { 1851 mutex_enter(&dsp->ds_tx_list_lock); 1852 if (dsp->ds_tx_list_head != NULL) { 1853 freemsgchain(dsp->ds_tx_list_head); 1854 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1855 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1856 if (dsp->ds_tx_qbusy) { 1857 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1858 ASSERT(dsp->ds_tx_flow_mp != NULL); 1859 dsp->ds_tx_qbusy = B_FALSE; 1860 } 1861 } 1862 mutex_exit(&dsp->ds_tx_list_lock); 1863 } 1864 1865 /* 1866 * Process an M_IOCTL message. 1867 */ 1868 static void 1869 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1870 { 1871 uint_t cmd; 1872 1873 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1874 ASSERT(dsp->ds_type == DLD_DLPI); 1875 1876 switch (cmd) { 1877 case DLIOCNATIVE: 1878 ioc_native(dsp, mp); 1879 break; 1880 case DLIOCRAW: 1881 ioc_raw(dsp, mp); 1882 break; 1883 case DLIOCHDRINFO: 1884 ioc_fast(dsp, mp); 1885 break; 1886 default: 1887 ioc(dsp, mp); 1888 } 1889 } 1890 1891 /* 1892 * DLIOCNATIVE 1893 */ 1894 static void 1895 ioc_native(dld_str_t *dsp, mblk_t *mp) 1896 { 1897 queue_t *q = dsp->ds_wq; 1898 const mac_info_t *mip = dsp->ds_mip; 1899 1900 rw_enter(&dsp->ds_lock, RW_WRITER); 1901 1902 /* 1903 * Native mode can be enabled if it's disabled and if the 1904 * native media type is different. 1905 */ 1906 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 1907 dsp->ds_native = B_TRUE; 1908 1909 rw_exit(&dsp->ds_lock); 1910 1911 if (dsp->ds_native) 1912 miocack(q, mp, 0, mip->mi_nativemedia); 1913 else 1914 miocnak(q, mp, 0, ENOTSUP); 1915 } 1916 1917 /* 1918 * DLIOCRAW 1919 */ 1920 static void 1921 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1922 { 1923 queue_t *q = dsp->ds_wq; 1924 1925 rw_enter(&dsp->ds_lock, RW_WRITER); 1926 if (dsp->ds_polling || dsp->ds_soft_ring) { 1927 rw_exit(&dsp->ds_lock); 1928 miocnak(q, mp, 0, EPROTO); 1929 return; 1930 } 1931 1932 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1933 /* 1934 * Set the receive callback. 1935 */ 1936 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1937 } 1938 1939 /* 1940 * Note that raw mode is enabled. 1941 */ 1942 dsp->ds_mode = DLD_RAW; 1943 1944 rw_exit(&dsp->ds_lock); 1945 miocack(q, mp, 0, 0); 1946 } 1947 1948 /* 1949 * DLIOCHDRINFO 1950 */ 1951 static void 1952 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1953 { 1954 dl_unitdata_req_t *dlp; 1955 off_t off; 1956 size_t len; 1957 const uint8_t *addr; 1958 uint16_t sap; 1959 mblk_t *nmp; 1960 mblk_t *hmp; 1961 uint_t addr_length; 1962 queue_t *q = dsp->ds_wq; 1963 int err; 1964 dls_channel_t dc; 1965 1966 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1967 err = ENOTSUP; 1968 goto failed; 1969 } 1970 1971 /* 1972 * DLIOCHDRINFO should only come from IP. The one initiated from 1973 * user-land should not be allowed. 1974 */ 1975 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 1976 err = EINVAL; 1977 goto failed; 1978 } 1979 1980 nmp = mp->b_cont; 1981 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1982 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1983 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1984 err = EINVAL; 1985 goto failed; 1986 } 1987 1988 off = dlp->dl_dest_addr_offset; 1989 len = dlp->dl_dest_addr_length; 1990 1991 if (!MBLKIN(nmp, off, len)) { 1992 err = EINVAL; 1993 goto failed; 1994 } 1995 1996 rw_enter(&dsp->ds_lock, RW_READER); 1997 if (dsp->ds_dlstate != DL_IDLE) { 1998 rw_exit(&dsp->ds_lock); 1999 err = ENOTSUP; 2000 goto failed; 2001 } 2002 2003 addr_length = dsp->ds_mip->mi_addr_length; 2004 if (len != addr_length + sizeof (uint16_t)) { 2005 rw_exit(&dsp->ds_lock); 2006 err = EINVAL; 2007 goto failed; 2008 } 2009 2010 addr = nmp->b_rptr + off; 2011 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2012 dc = dsp->ds_dc; 2013 2014 if ((hmp = dls_header(dc, addr, sap, 0, NULL)) == NULL) { 2015 rw_exit(&dsp->ds_lock); 2016 err = ENOMEM; 2017 goto failed; 2018 } 2019 2020 /* 2021 * This is a performance optimization. We originally entered 2022 * as reader and only become writer upon transitioning into 2023 * the DLD_FASTPATH mode for the first time. Otherwise we 2024 * stay as reader and return the fast-path header to IP. 2025 */ 2026 if (dsp->ds_mode != DLD_FASTPATH) { 2027 if (!rw_tryupgrade(&dsp->ds_lock)) { 2028 rw_exit(&dsp->ds_lock); 2029 rw_enter(&dsp->ds_lock, RW_WRITER); 2030 2031 /* 2032 * State may have changed before we re-acquired 2033 * the writer lock in case the upgrade failed. 2034 */ 2035 if (dsp->ds_dlstate != DL_IDLE) { 2036 rw_exit(&dsp->ds_lock); 2037 err = ENOTSUP; 2038 goto failed; 2039 } 2040 } 2041 2042 /* 2043 * Set the receive callback (unless polling is enabled). 2044 */ 2045 if (!dsp->ds_polling && !dsp->ds_soft_ring) 2046 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 2047 2048 /* 2049 * Note that fast-path mode is enabled. 2050 */ 2051 dsp->ds_mode = DLD_FASTPATH; 2052 } 2053 rw_exit(&dsp->ds_lock); 2054 2055 freemsg(nmp->b_cont); 2056 nmp->b_cont = hmp; 2057 2058 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2059 return; 2060 failed: 2061 miocnak(q, mp, 0, err); 2062 } 2063 2064 /* 2065 * Catch-all handler. 2066 */ 2067 static void 2068 ioc(dld_str_t *dsp, mblk_t *mp) 2069 { 2070 queue_t *q = dsp->ds_wq; 2071 mac_handle_t mh; 2072 2073 rw_enter(&dsp->ds_lock, RW_READER); 2074 if (dsp->ds_dlstate == DL_UNATTACHED) { 2075 rw_exit(&dsp->ds_lock); 2076 miocnak(q, mp, 0, EINVAL); 2077 return; 2078 } 2079 mh = dsp->ds_mh; 2080 ASSERT(mh != NULL); 2081 rw_exit(&dsp->ds_lock); 2082 mac_ioctl(mh, q, mp); 2083 } 2084