1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Data-Link Driver 30 */ 31 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/atomic.h> 36 #include <sys/mkdev.h> 37 #include <sys/vlan.h> 38 #include <sys/dld.h> 39 #include <sys/dld_impl.h> 40 #include <sys/dls_impl.h> 41 #include <inet/common.h> 42 43 static int str_constructor(void *, void *, int); 44 static void str_destructor(void *, void *); 45 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 46 static void str_notify_promisc_on_phys(dld_str_t *); 47 static void str_notify_promisc_off_phys(dld_str_t *); 48 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 49 static void str_notify_link_up(dld_str_t *); 50 static void str_notify_link_down(dld_str_t *); 51 static void str_notify_capab_reneg(dld_str_t *); 52 static void str_notify_speed(dld_str_t *, uint32_t); 53 static void str_notify(void *, mac_notify_type_t); 54 55 static void ioc_native(dld_str_t *, mblk_t *); 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 61 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t); 62 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *); 63 64 static uint32_t str_count; 65 static kmem_cache_t *str_cachep; 66 static uint32_t minor_count; 67 static mod_hash_t *str_hashp; 68 69 #define STR_HASHSZ 64 70 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 71 72 /* 73 * Some notes on entry points, flow-control, queueing and locking: 74 * 75 * This driver exports the traditional STREAMS put entry point as well as 76 * the non-STREAMS fast-path transmit routine which is provided to IP via 77 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 78 * and data operations, while the fast-path routine deals only with M_DATA 79 * fast-path packets. Regardless of the entry point, all outbound packets 80 * will end up in dld_tx_single(), where they will be delivered to the MAC 81 * driver. 82 * 83 * The transmit logic operates in two modes: a "not busy" mode where the 84 * packets will be delivered to the MAC for a send attempt, or "busy" mode 85 * where they will be enqueued in the internal queue because of flow-control. 86 * Flow-control happens when the MAC driver indicates the packets couldn't 87 * be transmitted due to lack of resources (e.g. running out of descriptors). 88 * In such case, the driver will place a dummy message on its write-side 89 * STREAMS queue so that the queue is marked as "full". Any subsequent 90 * packets arriving at the driver will be enqueued in the internal queue, 91 * which is drained in the context of the service thread that gets scheduled 92 * whenever the driver is in the "busy" mode. When all packets have been 93 * successfully delivered by MAC and the internal queue is empty, it will 94 * transition to the "not busy" mode by removing the dummy message from the 95 * write-side STREAMS queue; in effect this will trigger backenabling. 96 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 97 * to the above reasons. 98 * 99 * The driver implements an internal transmit queue independent of STREAMS. 100 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 101 * compared to the putq() and get() STREAMS interfaces. The only putq() and 102 * getq() operations done by the driver are those related to placing and 103 * removing the dummy message to/from the write-side STREAMS queue for flow- 104 * control purposes. 105 * 106 * Locking is done independent of STREAMS due to the driver being fully MT. 107 * Threads entering the driver (either from put or service entry points) 108 * will most likely be readers, with the exception of a few writer cases 109 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 110 * DLD-related ioctl requests. The DLPI detach case is special, because 111 * it involves freeing resources and therefore must be single-threaded. 112 * Unfortunately the readers/writers lock can't be used to protect against 113 * it, because the lock is dropped prior to the driver calling places where 114 * putnext() may be invoked, and such places may depend on those resources 115 * to exist. Because of this, the driver always completes the DLPI detach 116 * process when there are no other threads running in the driver. This is 117 * done by keeping track of the number of threads, such that the the last 118 * thread leaving the driver will finish the pending DLPI detach operation. 119 */ 120 121 /* 122 * dld_max_q_count is the queue depth threshold used to limit the number of 123 * outstanding packets or bytes allowed in the queue; once this limit is 124 * reached the driver will free any incoming ones until the queue depth 125 * drops below the threshold. 126 * 127 * This buffering is provided to accomodate clients which do not employ 128 * their own buffering scheme, and to handle occasional packet bursts. 129 * Clients which handle their own buffering will receive positive feedback 130 * from this driver as soon as it transitions into the "busy" state, i.e. 131 * when the queue is initially filled up; they will get backenabled once 132 * the queue is empty. 133 * 134 * The value chosen here is rather arbitrary; in future some intelligent 135 * heuristics may be involved which could take into account the hardware's 136 * transmit ring size, etc. 137 */ 138 uint_t dld_max_q_count = (16 * 1024 *1024); 139 140 /* 141 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 142 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 143 * match dev_t. If a stream is found and it is attached, its dev_info_t * 144 * is returned. 145 */ 146 typedef struct i_dld_str_state_s { 147 major_t ds_major; 148 minor_t ds_minor; 149 dev_info_t *ds_dip; 150 } i_dld_str_state_t; 151 152 /* ARGSUSED */ 153 static uint_t 154 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 155 { 156 i_dld_str_state_t *statep = arg; 157 dld_str_t *dsp = (dld_str_t *)val; 158 159 if (statep->ds_major != dsp->ds_major) 160 return (MH_WALK_CONTINUE); 161 162 ASSERT(statep->ds_minor != 0); 163 164 /* 165 * Access to ds_ppa and ds_mh need to be protected by ds_lock. 166 */ 167 rw_enter(&dsp->ds_lock, RW_READER); 168 if (statep->ds_minor <= DLD_MAX_MINOR) { 169 /* 170 * Style 1: minor can be derived from the ppa. we 171 * continue to walk until we find a matching stream 172 * in attached state. 173 */ 174 if (statep->ds_minor == DLS_PPA2MINOR(dsp->ds_ppa) && 175 dsp->ds_mh != NULL) { 176 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 177 rw_exit(&dsp->ds_lock); 178 return (MH_WALK_TERMINATE); 179 } 180 } else { 181 /* 182 * Clone: a clone minor is unique. we can terminate the 183 * walk if we find a matching stream -- even if we fail 184 * to obtain the devinfo. 185 */ 186 if (statep->ds_minor == dsp->ds_minor) { 187 if (dsp->ds_mh != NULL) 188 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 189 rw_exit(&dsp->ds_lock); 190 return (MH_WALK_TERMINATE); 191 } 192 } 193 rw_exit(&dsp->ds_lock); 194 return (MH_WALK_CONTINUE); 195 } 196 197 static dev_info_t * 198 dld_finddevinfo(dev_t dev) 199 { 200 i_dld_str_state_t state; 201 202 state.ds_minor = getminor(dev); 203 state.ds_major = getmajor(dev); 204 state.ds_dip = NULL; 205 206 if (state.ds_minor == 0) 207 return (NULL); 208 209 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 210 if (state.ds_dip != NULL || state.ds_minor <= DLD_MAX_MINOR) 211 return (state.ds_dip); 212 213 /* See if it's a minor node of a VLAN */ 214 return (dls_finddevinfo(dev)); 215 } 216 217 /* 218 * devo_getinfo: getinfo(9e) 219 */ 220 /*ARGSUSED*/ 221 int 222 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 223 { 224 dev_info_t *devinfo; 225 minor_t minor = getminor((dev_t)arg); 226 int rc = DDI_FAILURE; 227 228 switch (cmd) { 229 case DDI_INFO_DEVT2DEVINFO: 230 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 231 *(dev_info_t **)resp = devinfo; 232 rc = DDI_SUCCESS; 233 } 234 break; 235 case DDI_INFO_DEVT2INSTANCE: 236 if (minor > 0 && minor <= DLD_MAX_MINOR) { 237 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 238 rc = DDI_SUCCESS; 239 } else if (minor > DLD_MAX_MINOR && 240 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 241 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 242 rc = DDI_SUCCESS; 243 } 244 break; 245 } 246 return (rc); 247 } 248 249 /* 250 * qi_qopen: open(9e) 251 */ 252 /*ARGSUSED*/ 253 int 254 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 255 { 256 dld_str_t *dsp; 257 major_t major; 258 minor_t minor; 259 int err; 260 261 if (sflag == MODOPEN) 262 return (ENOTSUP); 263 264 /* 265 * This is a cloning driver and therefore each queue should only 266 * ever get opened once. 267 */ 268 if (rq->q_ptr != NULL) 269 return (EBUSY); 270 271 major = getmajor(*devp); 272 minor = getminor(*devp); 273 274 /* 275 * Create a new dld_str_t for the stream. This will grab a new minor 276 * number that will be handed back in the cloned dev_t. Creation may 277 * fail if we can't allocate the dummy mblk used for flow-control. 278 */ 279 dsp = dld_str_create(rq, DLD_DLPI, major, 280 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 281 if (dsp == NULL) 282 return (ENOSR); 283 284 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 285 if (minor != 0) { 286 /* 287 * Style 1 open 288 */ 289 t_uscalar_t ppa; 290 291 if ((err = dls_ppa_from_minor(minor, &ppa)) != 0) 292 goto failed; 293 294 if ((err = dld_str_attach(dsp, ppa)) != 0) 295 goto failed; 296 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 297 } else { 298 (void) qassociate(rq, -1); 299 } 300 301 /* 302 * Enable the queue srv(9e) routine. 303 */ 304 qprocson(rq); 305 306 /* 307 * Construct a cloned dev_t to hand back. 308 */ 309 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 310 return (0); 311 312 failed: 313 dld_str_destroy(dsp); 314 return (err); 315 } 316 317 /* 318 * qi_qclose: close(9e) 319 */ 320 int 321 dld_close(queue_t *rq) 322 { 323 dld_str_t *dsp = rq->q_ptr; 324 325 /* 326 * Wait until pending requests are processed. 327 */ 328 mutex_enter(&dsp->ds_thr_lock); 329 while (dsp->ds_pending_cnt > 0) 330 cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); 331 mutex_exit(&dsp->ds_thr_lock); 332 333 /* 334 * Disable the queue srv(9e) routine. 335 */ 336 qprocsoff(rq); 337 338 /* 339 * At this point we can not be entered by any threads via STREAMS 340 * or the direct call interface, which is available only to IP. 341 * After the interface is unplumbed, IP wouldn't have any reference 342 * to this instance, and therefore we are now effectively single 343 * threaded and don't require any lock protection. Flush all 344 * pending packets which are sitting in the transmit queue. 345 */ 346 ASSERT(dsp->ds_thr == 0); 347 dld_tx_flush(dsp); 348 349 /* 350 * This stream was open to a provider node. Check to see 351 * if it has been cleanly shut down. 352 */ 353 if (dsp->ds_dlstate != DL_UNATTACHED) { 354 /* 355 * The stream is either open to a style 1 provider or 356 * this is not clean shutdown. Detach from the PPA. 357 * (This is still ok even in the style 1 case). 358 */ 359 dld_str_detach(dsp); 360 } 361 362 dld_str_destroy(dsp); 363 return (0); 364 } 365 366 /* 367 * qi_qputp: put(9e) 368 */ 369 void 370 dld_wput(queue_t *wq, mblk_t *mp) 371 { 372 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 373 374 DLD_ENTER(dsp); 375 376 switch (DB_TYPE(mp)) { 377 case M_DATA: 378 /* 379 * State is held constant by the DLD_ENTER done above 380 * until all sending threads are done. Mode can change 381 * due to ioctl, however locks must not be held across 382 * calls to putnext(), which can be called from here 383 * via dld_tx_single(). 384 */ 385 rw_enter(&dsp->ds_lock, RW_READER); 386 if (dsp->ds_dlstate != DL_IDLE || 387 dsp->ds_mode == DLD_UNITDATA) { 388 rw_exit(&dsp->ds_lock); 389 freemsg(mp); 390 } else if (dsp->ds_mode == DLD_FASTPATH) { 391 rw_exit(&dsp->ds_lock); 392 str_mdata_fastpath_put(dsp, mp); 393 } else if (dsp->ds_mode == DLD_RAW) { 394 rw_exit(&dsp->ds_lock); 395 str_mdata_raw_put(dsp, mp); 396 } 397 break; 398 case M_PROTO: 399 case M_PCPROTO: 400 dld_proto(dsp, mp); 401 break; 402 case M_IOCTL: 403 dld_ioc(dsp, mp); 404 break; 405 case M_FLUSH: 406 if (*mp->b_rptr & FLUSHW) { 407 dld_tx_flush(dsp); 408 *mp->b_rptr &= ~FLUSHW; 409 } 410 411 if (*mp->b_rptr & FLUSHR) { 412 qreply(wq, mp); 413 } else { 414 freemsg(mp); 415 } 416 break; 417 default: 418 freemsg(mp); 419 break; 420 } 421 422 DLD_EXIT(dsp); 423 } 424 425 /* 426 * qi_srvp: srv(9e) 427 */ 428 void 429 dld_wsrv(queue_t *wq) 430 { 431 mblk_t *mp; 432 dld_str_t *dsp = wq->q_ptr; 433 434 DLD_ENTER(dsp); 435 rw_enter(&dsp->ds_lock, RW_READER); 436 /* 437 * Grab all packets (chained via b_next) off our transmit queue 438 * and try to send them all to the MAC layer. Since the queue 439 * is independent of streams, we are able to dequeue all messages 440 * at once without looping through getq() and manually chaining 441 * them. Note that the queue size parameters (byte and message 442 * counts) are cleared as well, but we postpone the backenabling 443 * until after the MAC transmit since some packets may end up 444 * back at our transmit queue. 445 */ 446 mutex_enter(&dsp->ds_tx_list_lock); 447 if ((mp = dsp->ds_tx_list_head) == NULL) { 448 ASSERT(!dsp->ds_tx_qbusy); 449 ASSERT(dsp->ds_tx_flow_mp != NULL); 450 ASSERT(dsp->ds_tx_list_head == NULL); 451 ASSERT(dsp->ds_tx_list_tail == NULL); 452 ASSERT(dsp->ds_tx_cnt == 0); 453 ASSERT(dsp->ds_tx_msgcnt == 0); 454 mutex_exit(&dsp->ds_tx_list_lock); 455 rw_exit(&dsp->ds_lock); 456 DLD_EXIT(dsp); 457 return; 458 } 459 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 460 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 461 mutex_exit(&dsp->ds_tx_list_lock); 462 463 /* 464 * Discard packets unless we are attached and bound; note that 465 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 466 * because regardless of the mode all transmit will end up in 467 * dld_tx_single() where the packets may be queued. 468 */ 469 ASSERT(DB_TYPE(mp) == M_DATA); 470 if (dsp->ds_dlstate != DL_IDLE) { 471 freemsgchain(mp); 472 goto done; 473 } 474 475 /* 476 * Attempt to transmit one or more packets. If the MAC can't 477 * send them all, re-queue the packet(s) at the beginning of 478 * the transmit queue to avoid any re-ordering. 479 */ 480 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 481 dld_tx_enqueue(dsp, mp, B_TRUE); 482 483 done: 484 /* 485 * Grab the list lock again and check if the transmit queue is 486 * really empty; if so, lift up flow-control and backenable any 487 * writer queues. If the queue is not empty, schedule service 488 * thread to drain it. 489 */ 490 mutex_enter(&dsp->ds_tx_list_lock); 491 if (dsp->ds_tx_list_head == NULL) { 492 dsp->ds_tx_flow_mp = getq(wq); 493 ASSERT(dsp->ds_tx_flow_mp != NULL); 494 dsp->ds_tx_qbusy = B_FALSE; 495 } 496 mutex_exit(&dsp->ds_tx_list_lock); 497 498 rw_exit(&dsp->ds_lock); 499 DLD_EXIT(dsp); 500 } 501 502 void 503 dld_init_ops(struct dev_ops *ops, const char *name) 504 { 505 struct streamtab *stream; 506 struct qinit *rq, *wq; 507 struct module_info *modinfo; 508 509 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 510 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 511 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 512 modinfo->mi_minpsz = 0; 513 modinfo->mi_maxpsz = 64*1024; 514 modinfo->mi_hiwat = 1; 515 modinfo->mi_lowat = 0; 516 517 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 518 rq->qi_qopen = dld_open; 519 rq->qi_qclose = dld_close; 520 rq->qi_minfo = modinfo; 521 522 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 523 wq->qi_putp = (pfi_t)dld_wput; 524 wq->qi_srvp = (pfi_t)dld_wsrv; 525 wq->qi_minfo = modinfo; 526 527 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 528 stream->st_rdinit = rq; 529 stream->st_wrinit = wq; 530 ops->devo_cb_ops->cb_str = stream; 531 532 ops->devo_getinfo = &dld_getinfo; 533 } 534 535 void 536 dld_fini_ops(struct dev_ops *ops) 537 { 538 struct streamtab *stream; 539 struct qinit *rq, *wq; 540 struct module_info *modinfo; 541 542 stream = ops->devo_cb_ops->cb_str; 543 rq = stream->st_rdinit; 544 wq = stream->st_wrinit; 545 modinfo = rq->qi_minfo; 546 ASSERT(wq->qi_minfo == modinfo); 547 548 kmem_free(stream, sizeof (struct streamtab)); 549 kmem_free(wq, sizeof (struct qinit)); 550 kmem_free(rq, sizeof (struct qinit)); 551 kmem_free(modinfo->mi_idname, FMNAMESZ); 552 kmem_free(modinfo, sizeof (struct module_info)); 553 } 554 555 /* 556 * Initialize this module's data structures. 557 */ 558 void 559 dld_str_init(void) 560 { 561 /* 562 * Create dld_str_t object cache. 563 */ 564 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 565 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 566 ASSERT(str_cachep != NULL); 567 568 /* 569 * Create a hash table for maintaining dld_str_t's. 570 * The ds_minor field (the clone minor number) of a dld_str_t 571 * is used as a key for this hash table because this number is 572 * globally unique (allocated from "dls_minor_arena"). 573 */ 574 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 575 mod_hash_null_valdtor); 576 } 577 578 /* 579 * Tear down this module's data structures. 580 */ 581 int 582 dld_str_fini(void) 583 { 584 /* 585 * Make sure that there are no objects in use. 586 */ 587 if (str_count != 0) 588 return (EBUSY); 589 590 /* 591 * Check to see if there are any minor numbers still in use. 592 */ 593 if (minor_count != 0) 594 return (EBUSY); 595 596 /* 597 * Destroy object cache. 598 */ 599 kmem_cache_destroy(str_cachep); 600 mod_hash_destroy_idhash(str_hashp); 601 return (0); 602 } 603 604 /* 605 * Create a new dld_str_t object. 606 */ 607 dld_str_t * 608 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 609 { 610 dld_str_t *dsp; 611 int err; 612 613 /* 614 * Allocate an object from the cache. 615 */ 616 atomic_add_32(&str_count, 1); 617 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 618 619 /* 620 * Allocate the dummy mblk for flow-control. 621 */ 622 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 623 if (dsp->ds_tx_flow_mp == NULL) { 624 kmem_cache_free(str_cachep, dsp); 625 atomic_add_32(&str_count, -1); 626 return (NULL); 627 } 628 dsp->ds_type = type; 629 dsp->ds_major = major; 630 dsp->ds_style = style; 631 632 /* 633 * Initialize the queue pointers. 634 */ 635 ASSERT(RD(rq) == rq); 636 dsp->ds_rq = rq; 637 dsp->ds_wq = WR(rq); 638 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 639 640 /* 641 * We want explicit control over our write-side STREAMS queue 642 * where the dummy mblk gets added/removed for flow-control. 643 */ 644 noenable(WR(rq)); 645 646 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 647 (mod_hash_val_t)dsp); 648 ASSERT(err == 0); 649 return (dsp); 650 } 651 652 /* 653 * Destroy a dld_str_t object. 654 */ 655 void 656 dld_str_destroy(dld_str_t *dsp) 657 { 658 queue_t *rq; 659 queue_t *wq; 660 mod_hash_val_t val; 661 /* 662 * Clear the queue pointers. 663 */ 664 rq = dsp->ds_rq; 665 wq = dsp->ds_wq; 666 ASSERT(wq == WR(rq)); 667 668 rq->q_ptr = wq->q_ptr = NULL; 669 dsp->ds_rq = dsp->ds_wq = NULL; 670 671 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 672 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 673 ASSERT(dsp->ds_tx_list_head == NULL); 674 ASSERT(dsp->ds_tx_list_tail == NULL); 675 ASSERT(dsp->ds_tx_cnt == 0); 676 ASSERT(dsp->ds_tx_msgcnt == 0); 677 ASSERT(!dsp->ds_tx_qbusy); 678 679 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 680 ASSERT(dsp->ds_thr == 0); 681 ASSERT(dsp->ds_pending_req == NULL); 682 683 /* 684 * Reinitialize all the flags. 685 */ 686 dsp->ds_notifications = 0; 687 dsp->ds_passivestate = DLD_UNINITIALIZED; 688 dsp->ds_mode = DLD_UNITDATA; 689 dsp->ds_native = B_FALSE; 690 691 /* 692 * Free the dummy mblk if exists. 693 */ 694 if (dsp->ds_tx_flow_mp != NULL) { 695 freeb(dsp->ds_tx_flow_mp); 696 dsp->ds_tx_flow_mp = NULL; 697 } 698 699 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 700 ASSERT(dsp == (dld_str_t *)val); 701 702 /* 703 * Free the object back to the cache. 704 */ 705 kmem_cache_free(str_cachep, dsp); 706 atomic_add_32(&str_count, -1); 707 } 708 709 /* 710 * kmem_cache contructor function: see kmem_cache_create(9f). 711 */ 712 /*ARGSUSED*/ 713 static int 714 str_constructor(void *buf, void *cdrarg, int kmflags) 715 { 716 dld_str_t *dsp = buf; 717 718 bzero(buf, sizeof (dld_str_t)); 719 720 /* 721 * Allocate a new minor number. 722 */ 723 atomic_add_32(&minor_count, 1); 724 if ((dsp->ds_minor = dls_minor_hold(kmflags == KM_SLEEP)) == 0) { 725 atomic_add_32(&minor_count, -1); 726 return (-1); 727 } 728 729 /* 730 * Initialize the DLPI state machine. 731 */ 732 dsp->ds_dlstate = DL_UNATTACHED; 733 dsp->ds_ppa = (t_uscalar_t)-1; 734 735 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 736 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 737 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 738 cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); 739 740 return (0); 741 } 742 743 /* 744 * kmem_cache destructor function. 745 */ 746 /*ARGSUSED*/ 747 static void 748 str_destructor(void *buf, void *cdrarg) 749 { 750 dld_str_t *dsp = buf; 751 752 /* 753 * Make sure the DLPI state machine was reset. 754 */ 755 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 756 757 /* 758 * Make sure the data-link interface was closed. 759 */ 760 ASSERT(dsp->ds_mh == NULL); 761 ASSERT(dsp->ds_dc == NULL); 762 763 /* 764 * Make sure enabled notifications are cleared. 765 */ 766 ASSERT(dsp->ds_notifications == 0); 767 768 /* 769 * Make sure polling is disabled. 770 */ 771 ASSERT(!dsp->ds_polling); 772 773 /* 774 * Release the minor number. 775 */ 776 dls_minor_rele(dsp->ds_minor); 777 atomic_add_32(&minor_count, -1); 778 779 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 780 rw_destroy(&dsp->ds_lock); 781 782 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 783 mutex_destroy(&dsp->ds_tx_list_lock); 784 ASSERT(dsp->ds_tx_flow_mp == NULL); 785 786 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 787 mutex_destroy(&dsp->ds_thr_lock); 788 ASSERT(dsp->ds_pending_req == NULL); 789 ASSERT(dsp->ds_pending_op == NULL); 790 ASSERT(dsp->ds_pending_cnt == 0); 791 cv_destroy(&dsp->ds_pending_cv); 792 } 793 794 /* 795 * M_DATA put. Note that mp is a single message, not a chained message. 796 */ 797 void 798 dld_tx_single(dld_str_t *dsp, mblk_t *mp) 799 { 800 /* 801 * This function can be called from within dld or from an upper 802 * layer protocol (currently only tcp). If we are in the busy 803 * mode enqueue the packet(s) and return. Otherwise hand them 804 * over to the MAC driver for transmission; any remaining one(s) 805 * which didn't get sent will be queued. 806 * 807 * Note here that we don't grab the list lock prior to checking 808 * the busy flag. This is okay, because a missed transition 809 * will not cause any packet reordering for any particular TCP 810 * connection (which is single-threaded). The enqueue routine 811 * will atomically set the busy flag and schedule the service 812 * thread to run; the flag is only cleared by the service thread 813 * when there is no more packet to be transmitted. 814 */ 815 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 816 dld_tx_enqueue(dsp, mp, B_FALSE); 817 } 818 819 /* 820 * Update the priority bits and VID (may need to insert tag if mp points 821 * to an untagged packet. 822 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 823 */ 824 static mblk_t * 825 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid) 826 { 827 mblk_t *hmp; 828 struct ether_vlan_header *evhp; 829 struct ether_header *ehp; 830 uint16_t old_tci = 0; 831 size_t len; 832 833 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 834 835 evhp = (struct ether_vlan_header *)mp->b_rptr; 836 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 837 /* 838 * Tagged packet, update the priority bits. 839 */ 840 old_tci = ntohs(evhp->ether_tci); 841 len = sizeof (struct ether_vlan_header); 842 843 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 844 /* 845 * In case some drivers only check the db_ref 846 * count of the first mblk, we pullup the 847 * message into a single mblk. 848 */ 849 hmp = msgpullup(mp, -1); 850 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 851 freemsg(hmp); 852 return (NULL); 853 } else { 854 freemsg(mp); 855 mp = hmp; 856 } 857 } 858 859 evhp = (struct ether_vlan_header *)mp->b_rptr; 860 } else { 861 /* 862 * Untagged packet. Insert the special priority tag. 863 * First allocate a header mblk. 864 */ 865 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 866 if (hmp == NULL) 867 return (NULL); 868 869 evhp = (struct ether_vlan_header *)hmp->b_rptr; 870 ehp = (struct ether_header *)mp->b_rptr; 871 872 /* 873 * Copy the MAC addresses and typelen 874 */ 875 bcopy(ehp, evhp, (ETHERADDRL * 2)); 876 evhp->ether_type = ehp->ether_type; 877 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 878 879 hmp->b_wptr += sizeof (struct ether_vlan_header); 880 mp->b_rptr += sizeof (struct ether_header); 881 882 /* 883 * Free the original message if it's now empty. Link the 884 * rest of messages to the header message. 885 */ 886 if (MBLKL(mp) == 0) { 887 hmp->b_cont = mp->b_cont; 888 freeb(mp); 889 } else { 890 hmp->b_cont = mp; 891 } 892 mp = hmp; 893 } 894 895 if (pri == 0) 896 pri = VLAN_PRI(old_tci); 897 if (vid == VLAN_ID_NONE) 898 vid = VLAN_ID(old_tci); 899 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 900 return (mp); 901 } 902 903 /* 904 * M_DATA put (IP fast-path mode) 905 */ 906 void 907 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 908 { 909 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 910 mblk_t *newmp; 911 uint_t pri; 912 913 if (is_ethernet) { 914 /* 915 * Update the priority bits to the assigned priority. 916 */ 917 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 918 919 if (pri != 0) { 920 newmp = i_dld_ether_header_update_tag(mp, pri, 921 VLAN_ID_NONE); 922 if (newmp == NULL) 923 goto discard; 924 mp = newmp; 925 } 926 } 927 928 dld_tx_single(dsp, mp); 929 return; 930 931 discard: 932 /* TODO: bump kstat? */ 933 freemsg(mp); 934 } 935 936 /* 937 * M_DATA put (DLIOCRAW mode) 938 */ 939 static void 940 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 941 { 942 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 943 mblk_t *bp, *newmp; 944 size_t size; 945 mac_header_info_t mhi; 946 uint_t pri, vid; 947 948 /* 949 * Certain MAC type plugins provide an illusion for raw DLPI 950 * consumers. They pretend that the MAC layer is something that 951 * it's not for the benefit of observability tools. For example, 952 * mac_wifi pretends that it's Ethernet for such consumers. 953 * Here, unless native mode is enabled, we call into the MAC layer so 954 * that this illusion can be maintained. The plugin will optionally 955 * transform the MAC header here into something that can be passed 956 * down. The header goes from raw mode to "cooked" mode. 957 */ 958 if (!dsp->ds_native) { 959 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 960 goto discard; 961 mp = newmp; 962 } 963 964 size = MBLKL(mp); 965 966 /* 967 * Check the packet is not too big and that any remaining 968 * fragment list is composed entirely of M_DATA messages. (We 969 * know the first fragment was M_DATA otherwise we could not 970 * have got here). 971 */ 972 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 973 if (DB_TYPE(bp) != M_DATA) 974 goto discard; 975 size += MBLKL(bp); 976 } 977 978 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 979 goto discard; 980 981 /* 982 * If LSO is enabled, check the size against lso_max. Otherwise, 983 * compare the packet size with sdu_max. 984 */ 985 if (size > (dsp->ds_lso ? dsp->ds_lso_max : dsp->ds_mip->mi_sdu_max) 986 + mhi.mhi_hdrsize) 987 goto discard; 988 989 if (is_ethernet) { 990 /* 991 * Discard the packet if this is a VLAN stream but the VID in 992 * the packet is not correct. 993 */ 994 vid = VLAN_ID(mhi.mhi_tci); 995 if ((dsp->ds_vid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 996 goto discard; 997 998 /* 999 * Discard the packet if this packet is a tagged packet 1000 * but both pri and VID are 0. 1001 */ 1002 pri = VLAN_PRI(mhi.mhi_tci); 1003 if (mhi.mhi_istagged && (pri == 0) && (vid == VLAN_ID_NONE)) 1004 goto discard; 1005 1006 /* 1007 * Update the priority bits to the per-stream priority if 1008 * priority is not set in the packet. Update the VID for 1009 * packets on a VLAN stream. 1010 */ 1011 pri = (pri == 0) ? dsp->ds_pri : 0; 1012 if ((pri != 0) || (dsp->ds_vid != VLAN_ID_NONE)) { 1013 if ((newmp = i_dld_ether_header_update_tag(mp, 1014 pri, dsp->ds_vid)) == NULL) { 1015 goto discard; 1016 } 1017 mp = newmp; 1018 } 1019 } 1020 1021 dld_tx_single(dsp, mp); 1022 return; 1023 1024 discard: 1025 /* TODO: bump kstat? */ 1026 freemsg(mp); 1027 } 1028 1029 /* 1030 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 1031 */ 1032 int 1033 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1034 { 1035 int err; 1036 const char *drvname; 1037 char name[MAXNAMELEN]; 1038 dls_channel_t dc; 1039 uint_t addr_length; 1040 1041 ASSERT(dsp->ds_dc == NULL); 1042 1043 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1044 return (EINVAL); 1045 1046 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 1047 1048 if (strcmp(drvname, "aggr") != 0 && strcmp(drvname, "vnic") != 0 && 1049 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1050 return (EINVAL); 1051 1052 /* 1053 * Open a channel. 1054 */ 1055 if ((err = dls_open(name, &dc)) != 0) { 1056 (void) qassociate(dsp->ds_wq, -1); 1057 return (err); 1058 } 1059 1060 /* 1061 * Cache the MAC interface handle, a pointer to the immutable MAC 1062 * information and the current and 'factory' MAC address. 1063 */ 1064 dsp->ds_mh = dls_mac(dc); 1065 dsp->ds_mip = mac_info(dsp->ds_mh); 1066 1067 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1068 1069 addr_length = dsp->ds_mip->mi_addr_length; 1070 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 1071 1072 /* 1073 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 1074 * a non-VLAN interface). 1075 */ 1076 dsp->ds_vid = dls_vid(dc); 1077 1078 /* 1079 * Set the default packet priority. 1080 */ 1081 dsp->ds_pri = 0; 1082 1083 /* 1084 * Add a notify function so that the we get updates from the MAC. 1085 */ 1086 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 1087 1088 dsp->ds_ppa = ppa; 1089 dsp->ds_dc = dc; 1090 dsp->ds_dlstate = DL_UNBOUND; 1091 1092 return (0); 1093 } 1094 1095 /* 1096 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1097 * from close(2) for style 2. 1098 */ 1099 void 1100 dld_str_detach(dld_str_t *dsp) 1101 { 1102 ASSERT(dsp->ds_thr == 0); 1103 1104 /* 1105 * Remove the notify function. 1106 */ 1107 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 1108 1109 /* 1110 * Disable the capabilities and clear the promisc flag. 1111 */ 1112 ASSERT(!dsp->ds_polling); 1113 ASSERT(!dsp->ds_soft_ring); 1114 dld_capabilities_disable(dsp); 1115 dsp->ds_promisc = 0; 1116 1117 /* 1118 * Clear LSO flags. 1119 */ 1120 dsp->ds_lso = B_FALSE; 1121 dsp->ds_lso_max = 0; 1122 1123 /* 1124 * Close the channel. 1125 */ 1126 dls_close(dsp->ds_dc); 1127 dsp->ds_ppa = (t_uscalar_t)-1; 1128 dsp->ds_dc = NULL; 1129 dsp->ds_mh = NULL; 1130 1131 (void) qassociate(dsp->ds_wq, -1); 1132 1133 /* 1134 * Re-initialize the DLPI state machine. 1135 */ 1136 dsp->ds_dlstate = DL_UNATTACHED; 1137 1138 } 1139 1140 /* 1141 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1142 * tags before sending packets up to the DLS clients, with the exception of 1143 * special priority tagged packets, in that case, we set the VID to 0. 1144 * mp must be a VLAN tagged packet. 1145 */ 1146 static mblk_t * 1147 i_dld_ether_header_strip_tag(mblk_t *mp) 1148 { 1149 mblk_t *newmp; 1150 struct ether_vlan_header *evhp; 1151 uint16_t tci, new_tci; 1152 1153 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1154 if (DB_REF(mp) > 1) { 1155 newmp = copymsg(mp); 1156 if (newmp == NULL) 1157 return (NULL); 1158 freemsg(mp); 1159 mp = newmp; 1160 } 1161 evhp = (struct ether_vlan_header *)mp->b_rptr; 1162 1163 tci = ntohs(evhp->ether_tci); 1164 if (VLAN_PRI(tci) == 0) { 1165 /* 1166 * Priority is 0, strip the tag. 1167 */ 1168 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1169 mp->b_rptr += VLAN_TAGSZ; 1170 } else { 1171 /* 1172 * Priority is not 0, update the VID to 0. 1173 */ 1174 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1175 evhp->ether_tci = htons(new_tci); 1176 } 1177 return (mp); 1178 } 1179 1180 /* 1181 * Raw mode receive function. 1182 */ 1183 /*ARGSUSED*/ 1184 void 1185 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1186 mac_header_info_t *mhip) 1187 { 1188 dld_str_t *dsp = (dld_str_t *)arg; 1189 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1190 mblk_t *next, *newmp; 1191 1192 ASSERT(mp != NULL); 1193 do { 1194 /* 1195 * Get the pointer to the next packet in the chain and then 1196 * clear b_next before the packet gets passed on. 1197 */ 1198 next = mp->b_next; 1199 mp->b_next = NULL; 1200 1201 /* 1202 * Wind back b_rptr to point at the MAC header. 1203 */ 1204 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1205 mp->b_rptr -= mhip->mhi_hdrsize; 1206 1207 /* 1208 * Certain MAC type plugins provide an illusion for raw 1209 * DLPI consumers. They pretend that the MAC layer is 1210 * something that it's not for the benefit of observability 1211 * tools. For example, mac_wifi pretends that it's Ethernet 1212 * for such consumers. Here, unless native mode is enabled, 1213 * we call into the MAC layer so that this illusion can be 1214 * maintained. The plugin will optionally transform the MAC 1215 * header here into something that can be passed up to raw 1216 * consumers. The header goes from "cooked" mode to raw mode. 1217 */ 1218 if (!dsp->ds_native) { 1219 newmp = mac_header_uncook(dsp->ds_mh, mp); 1220 if (newmp == NULL) { 1221 freemsg(mp); 1222 goto next; 1223 } 1224 mp = newmp; 1225 } 1226 1227 /* 1228 * Strip the VLAN tag for VLAN streams. 1229 */ 1230 if (is_ethernet && dsp->ds_vid != VLAN_ID_NONE) { 1231 newmp = i_dld_ether_header_strip_tag(mp); 1232 if (newmp == NULL) { 1233 freemsg(mp); 1234 goto next; 1235 } 1236 mp = newmp; 1237 } 1238 1239 /* 1240 * Pass the packet on. 1241 */ 1242 if (canputnext(dsp->ds_rq)) 1243 putnext(dsp->ds_rq, mp); 1244 else 1245 freemsg(mp); 1246 1247 next: 1248 /* 1249 * Move on to the next packet in the chain. 1250 */ 1251 mp = next; 1252 } while (mp != NULL); 1253 } 1254 1255 /* 1256 * Fast-path receive function. 1257 */ 1258 /*ARGSUSED*/ 1259 void 1260 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1261 mac_header_info_t *mhip) 1262 { 1263 dld_str_t *dsp = (dld_str_t *)arg; 1264 mblk_t *next; 1265 size_t offset = 0; 1266 1267 /* 1268 * MAC header stripping rules: 1269 * - Tagged packets: 1270 * a. VLAN streams. Strip the whole VLAN header including the tag. 1271 * b. Physical streams 1272 * - VLAN packets (non-zero VID). The stream must be either a 1273 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1274 * Strip the Ethernet header but keep the VLAN header. 1275 * - Special tagged packets (zero VID) 1276 * * The stream is either a DL_PROMISC_SAP listener or a 1277 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1278 * keep the VLAN header. 1279 * * Otherwise, strip the whole VLAN header. 1280 * - Untagged packets. Strip the whole MAC header. 1281 */ 1282 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1283 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1284 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1285 offset = VLAN_TAGSZ; 1286 } 1287 1288 ASSERT(mp != NULL); 1289 do { 1290 /* 1291 * Get the pointer to the next packet in the chain and then 1292 * clear b_next before the packet gets passed on. 1293 */ 1294 next = mp->b_next; 1295 mp->b_next = NULL; 1296 1297 /* 1298 * Wind back b_rptr to point at the VLAN header. 1299 */ 1300 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1301 mp->b_rptr -= offset; 1302 1303 /* 1304 * Pass the packet on. 1305 */ 1306 if (canputnext(dsp->ds_rq)) 1307 putnext(dsp->ds_rq, mp); 1308 else 1309 freemsg(mp); 1310 /* 1311 * Move on to the next packet in the chain. 1312 */ 1313 mp = next; 1314 } while (mp != NULL); 1315 } 1316 1317 /* 1318 * Default receive function (send DL_UNITDATA_IND messages). 1319 */ 1320 /*ARGSUSED*/ 1321 void 1322 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1323 mac_header_info_t *mhip) 1324 { 1325 dld_str_t *dsp = (dld_str_t *)arg; 1326 mblk_t *ud_mp; 1327 mblk_t *next; 1328 size_t offset = 0; 1329 boolean_t strip_vlan = B_TRUE; 1330 1331 /* 1332 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1333 */ 1334 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1335 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1336 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1337 offset = VLAN_TAGSZ; 1338 strip_vlan = B_FALSE; 1339 } 1340 1341 ASSERT(mp != NULL); 1342 do { 1343 /* 1344 * Get the pointer to the next packet in the chain and then 1345 * clear b_next before the packet gets passed on. 1346 */ 1347 next = mp->b_next; 1348 mp->b_next = NULL; 1349 1350 /* 1351 * Wind back b_rptr to point at the MAC header. 1352 */ 1353 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1354 mp->b_rptr -= mhip->mhi_hdrsize; 1355 1356 /* 1357 * Create the DL_UNITDATA_IND M_PROTO. 1358 */ 1359 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1360 freemsgchain(mp); 1361 return; 1362 } 1363 1364 /* 1365 * Advance b_rptr to point at the payload (or the VLAN header). 1366 */ 1367 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1368 1369 /* 1370 * Prepend the DL_UNITDATA_IND. 1371 */ 1372 ud_mp->b_cont = mp; 1373 1374 /* 1375 * Send the message. 1376 */ 1377 if (canputnext(dsp->ds_rq)) 1378 putnext(dsp->ds_rq, ud_mp); 1379 else 1380 freemsg(ud_mp); 1381 1382 /* 1383 * Move on to the next packet in the chain. 1384 */ 1385 mp = next; 1386 } while (mp != NULL); 1387 } 1388 1389 /* 1390 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1391 * current state of the interface. 1392 */ 1393 void 1394 dld_str_notify_ind(dld_str_t *dsp) 1395 { 1396 mac_notify_type_t type; 1397 1398 for (type = 0; type < MAC_NNOTE; type++) 1399 str_notify(dsp, type); 1400 } 1401 1402 typedef struct dl_unitdata_ind_wrapper { 1403 dl_unitdata_ind_t dl_unitdata; 1404 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1405 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1406 } dl_unitdata_ind_wrapper_t; 1407 1408 /* 1409 * Create a DL_UNITDATA_IND M_PROTO message. 1410 */ 1411 static mblk_t * 1412 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1413 { 1414 mblk_t *nmp; 1415 dl_unitdata_ind_wrapper_t *dlwp; 1416 dl_unitdata_ind_t *dlp; 1417 mac_header_info_t mhi; 1418 uint_t addr_length; 1419 uint8_t *daddr; 1420 uint8_t *saddr; 1421 1422 /* 1423 * Get the packet header information. 1424 */ 1425 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 1426 return (NULL); 1427 1428 /* 1429 * Allocate a message large enough to contain the wrapper structure 1430 * defined above. 1431 */ 1432 if ((nmp = mexchange(dsp->ds_wq, NULL, 1433 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1434 DL_UNITDATA_IND)) == NULL) 1435 return (NULL); 1436 1437 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1438 1439 dlp = &(dlwp->dl_unitdata); 1440 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1441 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1442 1443 /* 1444 * Copy in the destination address. 1445 */ 1446 addr_length = dsp->ds_mip->mi_addr_length; 1447 daddr = dlwp->dl_dest_addr; 1448 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1449 bcopy(mhi.mhi_daddr, daddr, addr_length); 1450 1451 /* 1452 * Set the destination DLSAP to the SAP value encoded in the packet. 1453 */ 1454 if (mhi.mhi_istagged && !strip_vlan) 1455 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1456 else 1457 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1458 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1459 1460 /* 1461 * If the destination address was multicast or broadcast then the 1462 * dl_group_address field should be non-zero. 1463 */ 1464 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1465 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1466 1467 /* 1468 * Copy in the source address if one exists. Some MAC types (DL_IB 1469 * for example) may not have access to source information. 1470 */ 1471 if (mhi.mhi_saddr == NULL) { 1472 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1473 } else { 1474 saddr = dlwp->dl_src_addr; 1475 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1476 bcopy(mhi.mhi_saddr, saddr, addr_length); 1477 1478 /* 1479 * Set the source DLSAP to the packet ethertype. 1480 */ 1481 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1482 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1483 } 1484 1485 return (nmp); 1486 } 1487 1488 /* 1489 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1490 */ 1491 static void 1492 str_notify_promisc_on_phys(dld_str_t *dsp) 1493 { 1494 mblk_t *mp; 1495 dl_notify_ind_t *dlip; 1496 1497 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1498 return; 1499 1500 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1501 M_PROTO, 0)) == NULL) 1502 return; 1503 1504 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1505 dlip = (dl_notify_ind_t *)mp->b_rptr; 1506 dlip->dl_primitive = DL_NOTIFY_IND; 1507 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1508 1509 qreply(dsp->ds_wq, mp); 1510 } 1511 1512 /* 1513 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1514 */ 1515 static void 1516 str_notify_promisc_off_phys(dld_str_t *dsp) 1517 { 1518 mblk_t *mp; 1519 dl_notify_ind_t *dlip; 1520 1521 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1522 return; 1523 1524 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1525 M_PROTO, 0)) == NULL) 1526 return; 1527 1528 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1529 dlip = (dl_notify_ind_t *)mp->b_rptr; 1530 dlip->dl_primitive = DL_NOTIFY_IND; 1531 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1532 1533 qreply(dsp->ds_wq, mp); 1534 } 1535 1536 /* 1537 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1538 */ 1539 static void 1540 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1541 { 1542 mblk_t *mp; 1543 dl_notify_ind_t *dlip; 1544 uint_t addr_length; 1545 uint16_t ethertype; 1546 1547 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1548 return; 1549 1550 addr_length = dsp->ds_mip->mi_addr_length; 1551 if ((mp = mexchange(dsp->ds_wq, NULL, 1552 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1553 M_PROTO, 0)) == NULL) 1554 return; 1555 1556 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1557 dlip = (dl_notify_ind_t *)mp->b_rptr; 1558 dlip->dl_primitive = DL_NOTIFY_IND; 1559 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1560 dlip->dl_data = DL_CURR_PHYS_ADDR; 1561 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1562 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1563 1564 bcopy(addr, &dlip[1], addr_length); 1565 1566 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1567 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1568 1569 qreply(dsp->ds_wq, mp); 1570 } 1571 1572 /* 1573 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1574 */ 1575 static void 1576 str_notify_link_up(dld_str_t *dsp) 1577 { 1578 mblk_t *mp; 1579 dl_notify_ind_t *dlip; 1580 1581 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1582 return; 1583 1584 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1585 M_PROTO, 0)) == NULL) 1586 return; 1587 1588 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1589 dlip = (dl_notify_ind_t *)mp->b_rptr; 1590 dlip->dl_primitive = DL_NOTIFY_IND; 1591 dlip->dl_notification = DL_NOTE_LINK_UP; 1592 1593 qreply(dsp->ds_wq, mp); 1594 } 1595 1596 /* 1597 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1598 */ 1599 static void 1600 str_notify_link_down(dld_str_t *dsp) 1601 { 1602 mblk_t *mp; 1603 dl_notify_ind_t *dlip; 1604 1605 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1606 return; 1607 1608 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1609 M_PROTO, 0)) == NULL) 1610 return; 1611 1612 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1613 dlip = (dl_notify_ind_t *)mp->b_rptr; 1614 dlip->dl_primitive = DL_NOTIFY_IND; 1615 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1616 1617 qreply(dsp->ds_wq, mp); 1618 } 1619 1620 /* 1621 * DL_NOTIFY_IND: DL_NOTE_SPEED 1622 */ 1623 static void 1624 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1625 { 1626 mblk_t *mp; 1627 dl_notify_ind_t *dlip; 1628 1629 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1630 return; 1631 1632 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1633 M_PROTO, 0)) == NULL) 1634 return; 1635 1636 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1637 dlip = (dl_notify_ind_t *)mp->b_rptr; 1638 dlip->dl_primitive = DL_NOTIFY_IND; 1639 dlip->dl_notification = DL_NOTE_SPEED; 1640 dlip->dl_data = speed; 1641 1642 qreply(dsp->ds_wq, mp); 1643 } 1644 1645 /* 1646 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1647 */ 1648 static void 1649 str_notify_capab_reneg(dld_str_t *dsp) 1650 { 1651 mblk_t *mp; 1652 dl_notify_ind_t *dlip; 1653 1654 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1655 return; 1656 1657 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1658 M_PROTO, 0)) == NULL) 1659 return; 1660 1661 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1662 dlip = (dl_notify_ind_t *)mp->b_rptr; 1663 dlip->dl_primitive = DL_NOTIFY_IND; 1664 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1665 1666 qreply(dsp->ds_wq, mp); 1667 } 1668 1669 /* 1670 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1671 */ 1672 static void 1673 str_notify_fastpath_flush(dld_str_t *dsp) 1674 { 1675 mblk_t *mp; 1676 dl_notify_ind_t *dlip; 1677 1678 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1679 return; 1680 1681 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1682 M_PROTO, 0)) == NULL) 1683 return; 1684 1685 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1686 dlip = (dl_notify_ind_t *)mp->b_rptr; 1687 dlip->dl_primitive = DL_NOTIFY_IND; 1688 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1689 1690 qreply(dsp->ds_wq, mp); 1691 } 1692 1693 /* 1694 * MAC notification callback. 1695 */ 1696 static void 1697 str_notify(void *arg, mac_notify_type_t type) 1698 { 1699 dld_str_t *dsp = (dld_str_t *)arg; 1700 queue_t *q = dsp->ds_wq; 1701 1702 switch (type) { 1703 case MAC_NOTE_TX: 1704 qenable(q); 1705 break; 1706 1707 case MAC_NOTE_DEVPROMISC: 1708 /* 1709 * Send the appropriate DL_NOTIFY_IND. 1710 */ 1711 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1712 str_notify_promisc_on_phys(dsp); 1713 else 1714 str_notify_promisc_off_phys(dsp); 1715 break; 1716 1717 case MAC_NOTE_PROMISC: 1718 break; 1719 1720 case MAC_NOTE_UNICST: 1721 /* 1722 * This notification is sent whenever the MAC unicast address 1723 * changes. We need to re-cache the address. 1724 */ 1725 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1726 1727 /* 1728 * Send the appropriate DL_NOTIFY_IND. 1729 */ 1730 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1731 break; 1732 1733 case MAC_NOTE_LINK: 1734 /* 1735 * This notification is sent every time the MAC driver 1736 * updates the link state. 1737 */ 1738 switch (mac_link_get(dsp->ds_mh)) { 1739 case LINK_STATE_UP: { 1740 uint64_t speed; 1741 /* 1742 * The link is up so send the appropriate 1743 * DL_NOTIFY_IND. 1744 */ 1745 str_notify_link_up(dsp); 1746 1747 speed = mac_stat_get(dsp->ds_mh, MAC_STAT_IFSPEED); 1748 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1749 break; 1750 } 1751 case LINK_STATE_DOWN: 1752 /* 1753 * The link is down so send the appropriate 1754 * DL_NOTIFY_IND. 1755 */ 1756 str_notify_link_down(dsp); 1757 break; 1758 1759 default: 1760 break; 1761 } 1762 break; 1763 1764 case MAC_NOTE_RESOURCE: 1765 case MAC_NOTE_VNIC: 1766 /* 1767 * This notification is sent whenever the MAC resources 1768 * change or capabilities change. We need to renegotiate 1769 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1770 */ 1771 str_notify_capab_reneg(dsp); 1772 break; 1773 1774 case MAC_NOTE_FASTPATH_FLUSH: 1775 str_notify_fastpath_flush(dsp); 1776 break; 1777 1778 default: 1779 ASSERT(B_FALSE); 1780 break; 1781 } 1782 } 1783 1784 /* 1785 * Enqueue one or more messages to the transmit queue. 1786 * Caller specifies the insertion position (head/tail). 1787 */ 1788 void 1789 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1790 { 1791 mblk_t *tail; 1792 queue_t *q = dsp->ds_wq; 1793 uint_t cnt, msgcnt; 1794 uint_t tot_cnt, tot_msgcnt; 1795 1796 ASSERT(DB_TYPE(mp) == M_DATA); 1797 /* Calculate total size and count of the packet(s) */ 1798 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1799 tail->b_next != NULL; tail = tail->b_next) { 1800 ASSERT(DB_TYPE(tail->b_next) == M_DATA); 1801 cnt += msgdsize(tail->b_next); 1802 msgcnt++; 1803 } 1804 1805 mutex_enter(&dsp->ds_tx_list_lock); 1806 /* 1807 * If the queue depth would exceed the allowed threshold, drop 1808 * new packet(s) and drain those already in the queue. 1809 */ 1810 tot_cnt = dsp->ds_tx_cnt + cnt; 1811 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1812 1813 if (!head_insert && 1814 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1815 ASSERT(dsp->ds_tx_qbusy); 1816 mutex_exit(&dsp->ds_tx_list_lock); 1817 freemsgchain(mp); 1818 goto done; 1819 } 1820 1821 /* Update the queue size parameters */ 1822 dsp->ds_tx_cnt = tot_cnt; 1823 dsp->ds_tx_msgcnt = tot_msgcnt; 1824 1825 /* 1826 * If the transmit queue is currently empty and we are 1827 * about to deposit the packet(s) there, switch mode to 1828 * "busy" and raise flow-control condition. 1829 */ 1830 if (!dsp->ds_tx_qbusy) { 1831 dsp->ds_tx_qbusy = B_TRUE; 1832 ASSERT(dsp->ds_tx_flow_mp != NULL); 1833 (void) putq(q, dsp->ds_tx_flow_mp); 1834 dsp->ds_tx_flow_mp = NULL; 1835 } 1836 1837 if (!head_insert) { 1838 /* Tail insertion */ 1839 if (dsp->ds_tx_list_head == NULL) 1840 dsp->ds_tx_list_head = mp; 1841 else 1842 dsp->ds_tx_list_tail->b_next = mp; 1843 dsp->ds_tx_list_tail = tail; 1844 } else { 1845 /* Head insertion */ 1846 tail->b_next = dsp->ds_tx_list_head; 1847 if (dsp->ds_tx_list_head == NULL) 1848 dsp->ds_tx_list_tail = tail; 1849 dsp->ds_tx_list_head = mp; 1850 } 1851 mutex_exit(&dsp->ds_tx_list_lock); 1852 done: 1853 /* Schedule service thread to drain the transmit queue */ 1854 if (!head_insert) 1855 qenable(q); 1856 } 1857 1858 void 1859 dld_tx_flush(dld_str_t *dsp) 1860 { 1861 mutex_enter(&dsp->ds_tx_list_lock); 1862 if (dsp->ds_tx_list_head != NULL) { 1863 freemsgchain(dsp->ds_tx_list_head); 1864 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1865 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1866 if (dsp->ds_tx_qbusy) { 1867 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1868 ASSERT(dsp->ds_tx_flow_mp != NULL); 1869 dsp->ds_tx_qbusy = B_FALSE; 1870 } 1871 } 1872 mutex_exit(&dsp->ds_tx_list_lock); 1873 } 1874 1875 /* 1876 * Process an M_IOCTL message. 1877 */ 1878 static void 1879 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1880 { 1881 uint_t cmd; 1882 1883 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1884 ASSERT(dsp->ds_type == DLD_DLPI); 1885 1886 switch (cmd) { 1887 case DLIOCNATIVE: 1888 ioc_native(dsp, mp); 1889 break; 1890 case DLIOCRAW: 1891 ioc_raw(dsp, mp); 1892 break; 1893 case DLIOCHDRINFO: 1894 ioc_fast(dsp, mp); 1895 break; 1896 default: 1897 ioc(dsp, mp); 1898 } 1899 } 1900 1901 /* 1902 * DLIOCNATIVE 1903 */ 1904 static void 1905 ioc_native(dld_str_t *dsp, mblk_t *mp) 1906 { 1907 queue_t *q = dsp->ds_wq; 1908 const mac_info_t *mip = dsp->ds_mip; 1909 1910 rw_enter(&dsp->ds_lock, RW_WRITER); 1911 1912 /* 1913 * Native mode can be enabled if it's disabled and if the 1914 * native media type is different. 1915 */ 1916 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 1917 dsp->ds_native = B_TRUE; 1918 1919 rw_exit(&dsp->ds_lock); 1920 1921 if (dsp->ds_native) 1922 miocack(q, mp, 0, mip->mi_nativemedia); 1923 else 1924 miocnak(q, mp, 0, ENOTSUP); 1925 } 1926 1927 /* 1928 * DLIOCRAW 1929 */ 1930 static void 1931 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1932 { 1933 queue_t *q = dsp->ds_wq; 1934 1935 rw_enter(&dsp->ds_lock, RW_WRITER); 1936 if (dsp->ds_polling || dsp->ds_soft_ring) { 1937 rw_exit(&dsp->ds_lock); 1938 miocnak(q, mp, 0, EPROTO); 1939 return; 1940 } 1941 1942 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1943 /* 1944 * Set the receive callback. 1945 */ 1946 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1947 } 1948 1949 /* 1950 * Note that raw mode is enabled. 1951 */ 1952 dsp->ds_mode = DLD_RAW; 1953 1954 rw_exit(&dsp->ds_lock); 1955 miocack(q, mp, 0, 0); 1956 } 1957 1958 /* 1959 * DLIOCHDRINFO 1960 */ 1961 static void 1962 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1963 { 1964 dl_unitdata_req_t *dlp; 1965 off_t off; 1966 size_t len; 1967 const uint8_t *addr; 1968 uint16_t sap; 1969 mblk_t *nmp; 1970 mblk_t *hmp; 1971 uint_t addr_length; 1972 queue_t *q = dsp->ds_wq; 1973 int err; 1974 dls_channel_t dc; 1975 1976 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1977 err = ENOTSUP; 1978 goto failed; 1979 } 1980 1981 /* 1982 * DLIOCHDRINFO should only come from IP. The one initiated from 1983 * user-land should not be allowed. 1984 */ 1985 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 1986 err = EINVAL; 1987 goto failed; 1988 } 1989 1990 nmp = mp->b_cont; 1991 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1992 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1993 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1994 err = EINVAL; 1995 goto failed; 1996 } 1997 1998 off = dlp->dl_dest_addr_offset; 1999 len = dlp->dl_dest_addr_length; 2000 2001 if (!MBLKIN(nmp, off, len)) { 2002 err = EINVAL; 2003 goto failed; 2004 } 2005 2006 rw_enter(&dsp->ds_lock, RW_READER); 2007 if (dsp->ds_dlstate != DL_IDLE) { 2008 rw_exit(&dsp->ds_lock); 2009 err = ENOTSUP; 2010 goto failed; 2011 } 2012 2013 addr_length = dsp->ds_mip->mi_addr_length; 2014 if (len != addr_length + sizeof (uint16_t)) { 2015 rw_exit(&dsp->ds_lock); 2016 err = EINVAL; 2017 goto failed; 2018 } 2019 2020 addr = nmp->b_rptr + off; 2021 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2022 dc = dsp->ds_dc; 2023 2024 if ((hmp = dls_header(dc, addr, sap, 0, NULL)) == NULL) { 2025 rw_exit(&dsp->ds_lock); 2026 err = ENOMEM; 2027 goto failed; 2028 } 2029 2030 /* 2031 * This is a performance optimization. We originally entered 2032 * as reader and only become writer upon transitioning into 2033 * the DLD_FASTPATH mode for the first time. Otherwise we 2034 * stay as reader and return the fast-path header to IP. 2035 */ 2036 if (dsp->ds_mode != DLD_FASTPATH) { 2037 if (!rw_tryupgrade(&dsp->ds_lock)) { 2038 rw_exit(&dsp->ds_lock); 2039 rw_enter(&dsp->ds_lock, RW_WRITER); 2040 2041 /* 2042 * State may have changed before we re-acquired 2043 * the writer lock in case the upgrade failed. 2044 */ 2045 if (dsp->ds_dlstate != DL_IDLE) { 2046 rw_exit(&dsp->ds_lock); 2047 err = ENOTSUP; 2048 goto failed; 2049 } 2050 } 2051 2052 /* 2053 * Set the receive callback (unless polling is enabled). 2054 */ 2055 if (!dsp->ds_polling && !dsp->ds_soft_ring) 2056 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 2057 2058 /* 2059 * Note that fast-path mode is enabled. 2060 */ 2061 dsp->ds_mode = DLD_FASTPATH; 2062 } 2063 rw_exit(&dsp->ds_lock); 2064 2065 freemsg(nmp->b_cont); 2066 nmp->b_cont = hmp; 2067 2068 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2069 return; 2070 failed: 2071 miocnak(q, mp, 0, err); 2072 } 2073 2074 /* 2075 * Catch-all handler. 2076 */ 2077 static void 2078 ioc(dld_str_t *dsp, mblk_t *mp) 2079 { 2080 queue_t *q = dsp->ds_wq; 2081 mac_handle_t mh; 2082 2083 rw_enter(&dsp->ds_lock, RW_READER); 2084 if (dsp->ds_dlstate == DL_UNATTACHED) { 2085 rw_exit(&dsp->ds_lock); 2086 miocnak(q, mp, 0, EINVAL); 2087 return; 2088 } 2089 mh = dsp->ds_mh; 2090 ASSERT(mh != NULL); 2091 rw_exit(&dsp->ds_lock); 2092 mac_ioctl(mh, q, mp); 2093 } 2094