1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Data-Link Driver 30 */ 31 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/atomic.h> 36 #include <sys/mkdev.h> 37 #include <sys/vlan.h> 38 #include <sys/dld.h> 39 #include <sys/dld_impl.h> 40 #include <sys/dls_impl.h> 41 #include <inet/common.h> 42 43 static int str_constructor(void *, void *, int); 44 static void str_destructor(void *, void *); 45 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 46 static void str_notify_promisc_on_phys(dld_str_t *); 47 static void str_notify_promisc_off_phys(dld_str_t *); 48 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 49 static void str_notify_link_up(dld_str_t *); 50 static void str_notify_link_down(dld_str_t *); 51 static void str_notify_capab_reneg(dld_str_t *); 52 static void str_notify_speed(dld_str_t *, uint32_t); 53 static void str_notify(void *, mac_notify_type_t); 54 55 static void ioc_native(dld_str_t *, mblk_t *); 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static minor_t dld_minor_hold(boolean_t); 61 static void dld_minor_rele(minor_t); 62 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 63 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t); 64 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *); 65 66 static uint32_t str_count; 67 static kmem_cache_t *str_cachep; 68 static vmem_t *minor_arenap; 69 static uint32_t minor_count; 70 static mod_hash_t *str_hashp; 71 72 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 73 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 74 75 #define STR_HASHSZ 64 76 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 77 78 /* 79 * Some notes on entry points, flow-control, queueing and locking: 80 * 81 * This driver exports the traditional STREAMS put entry point as well as 82 * the non-STREAMS fast-path transmit routine which is provided to IP via 83 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 84 * and data operations, while the fast-path routine deals only with M_DATA 85 * fast-path packets. Regardless of the entry point, all outbound packets 86 * will end up in dld_tx_single(), where they will be delivered to the MAC 87 * driver. 88 * 89 * The transmit logic operates in two modes: a "not busy" mode where the 90 * packets will be delivered to the MAC for a send attempt, or "busy" mode 91 * where they will be enqueued in the internal queue because of flow-control. 92 * Flow-control happens when the MAC driver indicates the packets couldn't 93 * be transmitted due to lack of resources (e.g. running out of descriptors). 94 * In such case, the driver will place a dummy message on its write-side 95 * STREAMS queue so that the queue is marked as "full". Any subsequent 96 * packets arriving at the driver will be enqueued in the internal queue, 97 * which is drained in the context of the service thread that gets scheduled 98 * whenever the driver is in the "busy" mode. When all packets have been 99 * successfully delivered by MAC and the internal queue is empty, it will 100 * transition to the "not busy" mode by removing the dummy message from the 101 * write-side STREAMS queue; in effect this will trigger backenabling. 102 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 103 * to the above reasons. 104 * 105 * The driver implements an internal transmit queue independent of STREAMS. 106 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 107 * compared to the putq() and get() STREAMS interfaces. The only putq() and 108 * getq() operations done by the driver are those related to placing and 109 * removing the dummy message to/from the write-side STREAMS queue for flow- 110 * control purposes. 111 * 112 * Locking is done independent of STREAMS due to the driver being fully MT. 113 * Threads entering the driver (either from put or service entry points) 114 * will most likely be readers, with the exception of a few writer cases 115 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 116 * DLD-related ioctl requests. The DLPI detach case is special, because 117 * it involves freeing resources and therefore must be single-threaded. 118 * Unfortunately the readers/writers lock can't be used to protect against 119 * it, because the lock is dropped prior to the driver calling places where 120 * putnext() may be invoked, and such places may depend on those resources 121 * to exist. Because of this, the driver always completes the DLPI detach 122 * process when there are no other threads running in the driver. This is 123 * done by keeping track of the number of threads, such that the the last 124 * thread leaving the driver will finish the pending DLPI detach operation. 125 */ 126 127 /* 128 * dld_max_q_count is the queue depth threshold used to limit the number of 129 * outstanding packets or bytes allowed in the queue; once this limit is 130 * reached the driver will free any incoming ones until the queue depth 131 * drops below the threshold. 132 * 133 * This buffering is provided to accomodate clients which do not employ 134 * their own buffering scheme, and to handle occasional packet bursts. 135 * Clients which handle their own buffering will receive positive feedback 136 * from this driver as soon as it transitions into the "busy" state, i.e. 137 * when the queue is initially filled up; they will get backenabled once 138 * the queue is empty. 139 * 140 * The value chosen here is rather arbitrary; in future some intelligent 141 * heuristics may be involved which could take into account the hardware's 142 * transmit ring size, etc. 143 */ 144 uint_t dld_max_q_count = (16 * 1024 *1024); 145 146 /* 147 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 148 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 149 * match dev_t. If a stream is found and it is attached, its dev_info_t * 150 * is returned. 151 */ 152 typedef struct i_dld_str_state_s { 153 major_t ds_major; 154 minor_t ds_minor; 155 dev_info_t *ds_dip; 156 } i_dld_str_state_t; 157 158 /* ARGSUSED */ 159 static uint_t 160 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 161 { 162 i_dld_str_state_t *statep = arg; 163 dld_str_t *dsp = (dld_str_t *)val; 164 165 if (statep->ds_major != dsp->ds_major) 166 return (MH_WALK_CONTINUE); 167 168 ASSERT(statep->ds_minor != 0); 169 170 /* 171 * Access to ds_ppa and ds_mh need to be protected by ds_lock. 172 */ 173 rw_enter(&dsp->ds_lock, RW_READER); 174 if (statep->ds_minor <= DLD_MAX_MINOR) { 175 /* 176 * Style 1: minor can be derived from the ppa. we 177 * continue to walk until we find a matching stream 178 * in attached state. 179 */ 180 if (statep->ds_minor == DLS_PPA2MINOR(dsp->ds_ppa) && 181 dsp->ds_mh != NULL) { 182 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 183 rw_exit(&dsp->ds_lock); 184 return (MH_WALK_TERMINATE); 185 } 186 } else { 187 /* 188 * Clone: a clone minor is unique. we can terminate the 189 * walk if we find a matching stream -- even if we fail 190 * to obtain the devinfo. 191 */ 192 if (statep->ds_minor == dsp->ds_minor) { 193 if (dsp->ds_mh != NULL) 194 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 195 rw_exit(&dsp->ds_lock); 196 return (MH_WALK_TERMINATE); 197 } 198 } 199 rw_exit(&dsp->ds_lock); 200 return (MH_WALK_CONTINUE); 201 } 202 203 static dev_info_t * 204 dld_finddevinfo(dev_t dev) 205 { 206 i_dld_str_state_t state; 207 208 state.ds_minor = getminor(dev); 209 state.ds_major = getmajor(dev); 210 state.ds_dip = NULL; 211 212 if (state.ds_minor == 0) 213 return (NULL); 214 215 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 216 return (state.ds_dip); 217 } 218 219 220 /* 221 * devo_getinfo: getinfo(9e) 222 */ 223 /*ARGSUSED*/ 224 int 225 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 226 { 227 dev_info_t *devinfo; 228 minor_t minor = getminor((dev_t)arg); 229 int rc = DDI_FAILURE; 230 231 switch (cmd) { 232 case DDI_INFO_DEVT2DEVINFO: 233 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 234 *(dev_info_t **)resp = devinfo; 235 rc = DDI_SUCCESS; 236 } 237 break; 238 case DDI_INFO_DEVT2INSTANCE: 239 if (minor > 0 && minor <= DLD_MAX_MINOR) { 240 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 241 rc = DDI_SUCCESS; 242 } else if (minor > DLD_MAX_MINOR && 243 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 244 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 245 rc = DDI_SUCCESS; 246 } 247 break; 248 } 249 return (rc); 250 } 251 252 /* 253 * qi_qopen: open(9e) 254 */ 255 /*ARGSUSED*/ 256 int 257 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 258 { 259 dld_str_t *dsp; 260 major_t major; 261 minor_t minor; 262 int err; 263 264 if (sflag == MODOPEN) 265 return (ENOTSUP); 266 267 /* 268 * This is a cloning driver and therefore each queue should only 269 * ever get opened once. 270 */ 271 if (rq->q_ptr != NULL) 272 return (EBUSY); 273 274 major = getmajor(*devp); 275 minor = getminor(*devp); 276 if (minor > DLD_MAX_MINOR) 277 return (ENODEV); 278 279 /* 280 * Create a new dld_str_t for the stream. This will grab a new minor 281 * number that will be handed back in the cloned dev_t. Creation may 282 * fail if we can't allocate the dummy mblk used for flow-control. 283 */ 284 dsp = dld_str_create(rq, DLD_DLPI, major, 285 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 286 if (dsp == NULL) 287 return (ENOSR); 288 289 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 290 if (minor != 0) { 291 /* 292 * Style 1 open 293 */ 294 295 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 296 goto failed; 297 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 298 } else { 299 (void) qassociate(rq, -1); 300 } 301 302 /* 303 * Enable the queue srv(9e) routine. 304 */ 305 qprocson(rq); 306 307 /* 308 * Construct a cloned dev_t to hand back. 309 */ 310 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 311 return (0); 312 313 failed: 314 dld_str_destroy(dsp); 315 return (err); 316 } 317 318 /* 319 * qi_qclose: close(9e) 320 */ 321 int 322 dld_close(queue_t *rq) 323 { 324 dld_str_t *dsp = rq->q_ptr; 325 326 /* 327 * Wait until pending requests are processed. 328 */ 329 mutex_enter(&dsp->ds_thr_lock); 330 while (dsp->ds_pending_cnt > 0) 331 cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); 332 mutex_exit(&dsp->ds_thr_lock); 333 334 /* 335 * Disable the queue srv(9e) routine. 336 */ 337 qprocsoff(rq); 338 339 /* 340 * At this point we can not be entered by any threads via STREAMS 341 * or the direct call interface, which is available only to IP. 342 * After the interface is unplumbed, IP wouldn't have any reference 343 * to this instance, and therefore we are now effectively single 344 * threaded and don't require any lock protection. Flush all 345 * pending packets which are sitting in the transmit queue. 346 */ 347 ASSERT(dsp->ds_thr == 0); 348 dld_tx_flush(dsp); 349 350 /* 351 * This stream was open to a provider node. Check to see 352 * if it has been cleanly shut down. 353 */ 354 if (dsp->ds_dlstate != DL_UNATTACHED) { 355 /* 356 * The stream is either open to a style 1 provider or 357 * this is not clean shutdown. Detach from the PPA. 358 * (This is still ok even in the style 1 case). 359 */ 360 dld_str_detach(dsp); 361 } 362 363 dld_str_destroy(dsp); 364 return (0); 365 } 366 367 /* 368 * qi_qputp: put(9e) 369 */ 370 void 371 dld_wput(queue_t *wq, mblk_t *mp) 372 { 373 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 374 375 DLD_ENTER(dsp); 376 377 switch (DB_TYPE(mp)) { 378 case M_DATA: 379 rw_enter(&dsp->ds_lock, RW_READER); 380 if (dsp->ds_dlstate != DL_IDLE || 381 dsp->ds_mode == DLD_UNITDATA) { 382 freemsg(mp); 383 } else if (dsp->ds_mode == DLD_FASTPATH) { 384 str_mdata_fastpath_put(dsp, mp); 385 } else if (dsp->ds_mode == DLD_RAW) { 386 str_mdata_raw_put(dsp, mp); 387 } 388 rw_exit(&dsp->ds_lock); 389 break; 390 case M_PROTO: 391 case M_PCPROTO: 392 dld_proto(dsp, mp); 393 break; 394 case M_IOCTL: 395 dld_ioc(dsp, mp); 396 break; 397 case M_FLUSH: 398 if (*mp->b_rptr & FLUSHW) { 399 dld_tx_flush(dsp); 400 *mp->b_rptr &= ~FLUSHW; 401 } 402 403 if (*mp->b_rptr & FLUSHR) { 404 qreply(wq, mp); 405 } else { 406 freemsg(mp); 407 } 408 break; 409 default: 410 freemsg(mp); 411 break; 412 } 413 414 DLD_EXIT(dsp); 415 } 416 417 /* 418 * qi_srvp: srv(9e) 419 */ 420 void 421 dld_wsrv(queue_t *wq) 422 { 423 mblk_t *mp; 424 dld_str_t *dsp = wq->q_ptr; 425 426 DLD_ENTER(dsp); 427 rw_enter(&dsp->ds_lock, RW_READER); 428 /* 429 * Grab all packets (chained via b_next) off our transmit queue 430 * and try to send them all to the MAC layer. Since the queue 431 * is independent of streams, we are able to dequeue all messages 432 * at once without looping through getq() and manually chaining 433 * them. Note that the queue size parameters (byte and message 434 * counts) are cleared as well, but we postpone the backenabling 435 * until after the MAC transmit since some packets may end up 436 * back at our transmit queue. 437 */ 438 mutex_enter(&dsp->ds_tx_list_lock); 439 if ((mp = dsp->ds_tx_list_head) == NULL) { 440 ASSERT(!dsp->ds_tx_qbusy); 441 ASSERT(dsp->ds_tx_flow_mp != NULL); 442 ASSERT(dsp->ds_tx_list_head == NULL); 443 ASSERT(dsp->ds_tx_list_tail == NULL); 444 ASSERT(dsp->ds_tx_cnt == 0); 445 ASSERT(dsp->ds_tx_msgcnt == 0); 446 mutex_exit(&dsp->ds_tx_list_lock); 447 rw_exit(&dsp->ds_lock); 448 DLD_EXIT(dsp); 449 return; 450 } 451 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 452 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 453 mutex_exit(&dsp->ds_tx_list_lock); 454 455 /* 456 * Discard packets unless we are attached and bound; note that 457 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 458 * because regardless of the mode all transmit will end up in 459 * dld_tx_single() where the packets may be queued. 460 */ 461 ASSERT(DB_TYPE(mp) == M_DATA); 462 if (dsp->ds_dlstate != DL_IDLE) { 463 freemsgchain(mp); 464 goto done; 465 } 466 467 /* 468 * Attempt to transmit one or more packets. If the MAC can't 469 * send them all, re-queue the packet(s) at the beginning of 470 * the transmit queue to avoid any re-ordering. 471 */ 472 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 473 dld_tx_enqueue(dsp, mp, B_TRUE); 474 475 done: 476 /* 477 * Grab the list lock again and check if the transmit queue is 478 * really empty; if so, lift up flow-control and backenable any 479 * writer queues. If the queue is not empty, schedule service 480 * thread to drain it. 481 */ 482 mutex_enter(&dsp->ds_tx_list_lock); 483 if (dsp->ds_tx_list_head == NULL) { 484 dsp->ds_tx_flow_mp = getq(wq); 485 ASSERT(dsp->ds_tx_flow_mp != NULL); 486 dsp->ds_tx_qbusy = B_FALSE; 487 } 488 mutex_exit(&dsp->ds_tx_list_lock); 489 490 rw_exit(&dsp->ds_lock); 491 DLD_EXIT(dsp); 492 } 493 494 void 495 dld_init_ops(struct dev_ops *ops, const char *name) 496 { 497 struct streamtab *stream; 498 struct qinit *rq, *wq; 499 struct module_info *modinfo; 500 501 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 502 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 503 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 504 modinfo->mi_minpsz = 0; 505 modinfo->mi_maxpsz = 64*1024; 506 modinfo->mi_hiwat = 1; 507 modinfo->mi_lowat = 0; 508 509 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 510 rq->qi_qopen = dld_open; 511 rq->qi_qclose = dld_close; 512 rq->qi_minfo = modinfo; 513 514 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 515 wq->qi_putp = (pfi_t)dld_wput; 516 wq->qi_srvp = (pfi_t)dld_wsrv; 517 wq->qi_minfo = modinfo; 518 519 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 520 stream->st_rdinit = rq; 521 stream->st_wrinit = wq; 522 ops->devo_cb_ops->cb_str = stream; 523 524 ops->devo_getinfo = &dld_getinfo; 525 } 526 527 void 528 dld_fini_ops(struct dev_ops *ops) 529 { 530 struct streamtab *stream; 531 struct qinit *rq, *wq; 532 struct module_info *modinfo; 533 534 stream = ops->devo_cb_ops->cb_str; 535 rq = stream->st_rdinit; 536 wq = stream->st_wrinit; 537 modinfo = rq->qi_minfo; 538 ASSERT(wq->qi_minfo == modinfo); 539 540 kmem_free(stream, sizeof (struct streamtab)); 541 kmem_free(wq, sizeof (struct qinit)); 542 kmem_free(rq, sizeof (struct qinit)); 543 kmem_free(modinfo->mi_idname, FMNAMESZ); 544 kmem_free(modinfo, sizeof (struct module_info)); 545 } 546 547 /* 548 * Initialize this module's data structures. 549 */ 550 void 551 dld_str_init(void) 552 { 553 /* 554 * Create dld_str_t object cache. 555 */ 556 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 557 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 558 ASSERT(str_cachep != NULL); 559 560 /* 561 * Allocate a vmem arena to manage minor numbers. The range of the 562 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 563 * minor number). 564 */ 565 minor_arenap = vmem_create("dld_minor_arena", 566 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 567 VM_SLEEP | VMC_IDENTIFIER); 568 ASSERT(minor_arenap != NULL); 569 570 /* 571 * Create a hash table for maintaining dld_str_t's. 572 * The ds_minor field (the clone minor number) of a dld_str_t 573 * is used as a key for this hash table because this number is 574 * globally unique (allocated from "dld_minor_arena"). 575 */ 576 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 577 mod_hash_null_valdtor); 578 } 579 580 /* 581 * Tear down this module's data structures. 582 */ 583 int 584 dld_str_fini(void) 585 { 586 /* 587 * Make sure that there are no objects in use. 588 */ 589 if (str_count != 0) 590 return (EBUSY); 591 592 /* 593 * Check to see if there are any minor numbers still in use. 594 */ 595 if (minor_count != 0) 596 return (EBUSY); 597 598 /* 599 * Destroy object cache. 600 */ 601 kmem_cache_destroy(str_cachep); 602 vmem_destroy(minor_arenap); 603 mod_hash_destroy_idhash(str_hashp); 604 return (0); 605 } 606 607 /* 608 * Create a new dld_str_t object. 609 */ 610 dld_str_t * 611 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 612 { 613 dld_str_t *dsp; 614 int err; 615 616 /* 617 * Allocate an object from the cache. 618 */ 619 atomic_add_32(&str_count, 1); 620 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 621 622 /* 623 * Allocate the dummy mblk for flow-control. 624 */ 625 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 626 if (dsp->ds_tx_flow_mp == NULL) { 627 kmem_cache_free(str_cachep, dsp); 628 atomic_add_32(&str_count, -1); 629 return (NULL); 630 } 631 dsp->ds_type = type; 632 dsp->ds_major = major; 633 dsp->ds_style = style; 634 635 /* 636 * Initialize the queue pointers. 637 */ 638 ASSERT(RD(rq) == rq); 639 dsp->ds_rq = rq; 640 dsp->ds_wq = WR(rq); 641 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 642 643 /* 644 * We want explicit control over our write-side STREAMS queue 645 * where the dummy mblk gets added/removed for flow-control. 646 */ 647 noenable(WR(rq)); 648 649 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 650 (mod_hash_val_t)dsp); 651 ASSERT(err == 0); 652 return (dsp); 653 } 654 655 /* 656 * Destroy a dld_str_t object. 657 */ 658 void 659 dld_str_destroy(dld_str_t *dsp) 660 { 661 queue_t *rq; 662 queue_t *wq; 663 mod_hash_val_t val; 664 /* 665 * Clear the queue pointers. 666 */ 667 rq = dsp->ds_rq; 668 wq = dsp->ds_wq; 669 ASSERT(wq == WR(rq)); 670 671 rq->q_ptr = wq->q_ptr = NULL; 672 dsp->ds_rq = dsp->ds_wq = NULL; 673 674 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 675 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 676 ASSERT(dsp->ds_tx_list_head == NULL); 677 ASSERT(dsp->ds_tx_list_tail == NULL); 678 ASSERT(dsp->ds_tx_cnt == 0); 679 ASSERT(dsp->ds_tx_msgcnt == 0); 680 ASSERT(!dsp->ds_tx_qbusy); 681 682 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 683 ASSERT(dsp->ds_thr == 0); 684 ASSERT(dsp->ds_pending_req == NULL); 685 686 /* 687 * Reinitialize all the flags. 688 */ 689 dsp->ds_notifications = 0; 690 dsp->ds_passivestate = DLD_UNINITIALIZED; 691 dsp->ds_mode = DLD_UNITDATA; 692 dsp->ds_native = B_FALSE; 693 694 /* 695 * Free the dummy mblk if exists. 696 */ 697 if (dsp->ds_tx_flow_mp != NULL) { 698 freeb(dsp->ds_tx_flow_mp); 699 dsp->ds_tx_flow_mp = NULL; 700 } 701 702 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 703 ASSERT(dsp == (dld_str_t *)val); 704 705 /* 706 * Free the object back to the cache. 707 */ 708 kmem_cache_free(str_cachep, dsp); 709 atomic_add_32(&str_count, -1); 710 } 711 712 /* 713 * kmem_cache contructor function: see kmem_cache_create(9f). 714 */ 715 /*ARGSUSED*/ 716 static int 717 str_constructor(void *buf, void *cdrarg, int kmflags) 718 { 719 dld_str_t *dsp = buf; 720 721 bzero(buf, sizeof (dld_str_t)); 722 723 /* 724 * Allocate a new minor number. 725 */ 726 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 727 return (-1); 728 729 /* 730 * Initialize the DLPI state machine. 731 */ 732 dsp->ds_dlstate = DL_UNATTACHED; 733 dsp->ds_ppa = (t_uscalar_t)-1; 734 735 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 736 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 737 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 738 cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); 739 740 return (0); 741 } 742 743 /* 744 * kmem_cache destructor function. 745 */ 746 /*ARGSUSED*/ 747 static void 748 str_destructor(void *buf, void *cdrarg) 749 { 750 dld_str_t *dsp = buf; 751 752 /* 753 * Make sure the DLPI state machine was reset. 754 */ 755 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 756 757 /* 758 * Make sure the data-link interface was closed. 759 */ 760 ASSERT(dsp->ds_mh == NULL); 761 ASSERT(dsp->ds_dc == NULL); 762 763 /* 764 * Make sure enabled notifications are cleared. 765 */ 766 ASSERT(dsp->ds_notifications == 0); 767 768 /* 769 * Make sure polling is disabled. 770 */ 771 ASSERT(!dsp->ds_polling); 772 773 /* 774 * Release the minor number. 775 */ 776 dld_minor_rele(dsp->ds_minor); 777 778 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 779 rw_destroy(&dsp->ds_lock); 780 781 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 782 mutex_destroy(&dsp->ds_tx_list_lock); 783 ASSERT(dsp->ds_tx_flow_mp == NULL); 784 785 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 786 mutex_destroy(&dsp->ds_thr_lock); 787 ASSERT(dsp->ds_pending_req == NULL); 788 ASSERT(dsp->ds_pending_op == NULL); 789 ASSERT(dsp->ds_pending_cnt == 0); 790 cv_destroy(&dsp->ds_pending_cv); 791 } 792 793 /* 794 * M_DATA put. Note that mp is a single message, not a chained message. 795 */ 796 void 797 dld_tx_single(dld_str_t *dsp, mblk_t *mp) 798 { 799 /* 800 * This function can be called from within dld or from an upper 801 * layer protocol (currently only tcp). If we are in the busy 802 * mode enqueue the packet(s) and return. Otherwise hand them 803 * over to the MAC driver for transmission; any remaining one(s) 804 * which didn't get sent will be queued. 805 * 806 * Note here that we don't grab the list lock prior to checking 807 * the busy flag. This is okay, because a missed transition 808 * will not cause any packet reordering for any particular TCP 809 * connection (which is single-threaded). The enqueue routine 810 * will atomically set the busy flag and schedule the service 811 * thread to run; the flag is only cleared by the service thread 812 * when there is no more packet to be transmitted. 813 */ 814 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 815 dld_tx_enqueue(dsp, mp, B_FALSE); 816 } 817 818 /* 819 * Update the priority bits and VID (may need to insert tag if mp points 820 * to an untagged packet. 821 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 822 */ 823 static mblk_t * 824 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid) 825 { 826 mblk_t *hmp; 827 struct ether_vlan_header *evhp; 828 struct ether_header *ehp; 829 uint16_t old_tci = 0; 830 size_t len; 831 832 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 833 834 evhp = (struct ether_vlan_header *)mp->b_rptr; 835 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 836 /* 837 * Tagged packet, update the priority bits. 838 */ 839 old_tci = ntohs(evhp->ether_tci); 840 len = sizeof (struct ether_vlan_header); 841 842 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 843 /* 844 * In case some drivers only check the db_ref 845 * count of the first mblk, we pullup the 846 * message into a single mblk. 847 */ 848 hmp = msgpullup(mp, -1); 849 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 850 freemsg(hmp); 851 return (NULL); 852 } else { 853 freemsg(mp); 854 mp = hmp; 855 } 856 } 857 858 evhp = (struct ether_vlan_header *)mp->b_rptr; 859 } else { 860 /* 861 * Untagged packet. Insert the special priority tag. 862 * First allocate a header mblk. 863 */ 864 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 865 if (hmp == NULL) 866 return (NULL); 867 868 evhp = (struct ether_vlan_header *)hmp->b_rptr; 869 ehp = (struct ether_header *)mp->b_rptr; 870 871 /* 872 * Copy the MAC addresses and typelen 873 */ 874 bcopy(ehp, evhp, (ETHERADDRL * 2)); 875 evhp->ether_type = ehp->ether_type; 876 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 877 878 hmp->b_wptr += sizeof (struct ether_vlan_header); 879 mp->b_rptr += sizeof (struct ether_header); 880 881 /* 882 * Free the original message if it's now empty. Link the 883 * rest of messages to the header message. 884 */ 885 if (MBLKL(mp) == 0) { 886 hmp->b_cont = mp->b_cont; 887 freeb(mp); 888 } else { 889 hmp->b_cont = mp; 890 } 891 mp = hmp; 892 } 893 894 if (pri == 0) 895 pri = VLAN_PRI(old_tci); 896 if (vid == VLAN_ID_NONE) 897 vid = VLAN_ID(old_tci); 898 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 899 return (mp); 900 } 901 902 /* 903 * M_DATA put (IP fast-path mode) 904 */ 905 void 906 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 907 { 908 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 909 mblk_t *newmp; 910 uint_t pri; 911 912 if (is_ethernet) { 913 /* 914 * Update the priority bits to the assigned priority. 915 */ 916 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 917 918 if (pri != 0) { 919 newmp = i_dld_ether_header_update_tag(mp, pri, 920 VLAN_ID_NONE); 921 if (newmp == NULL) 922 goto discard; 923 mp = newmp; 924 } 925 } 926 927 dld_tx_single(dsp, mp); 928 return; 929 930 discard: 931 /* TODO: bump kstat? */ 932 freemsg(mp); 933 } 934 935 /* 936 * M_DATA put (DLIOCRAW mode) 937 */ 938 static void 939 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 940 { 941 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 942 mblk_t *bp, *newmp; 943 size_t size; 944 mac_header_info_t mhi; 945 uint_t pri, vid; 946 947 /* 948 * Certain MAC type plugins provide an illusion for raw DLPI 949 * consumers. They pretend that the MAC layer is something that 950 * it's not for the benefit of observability tools. For example, 951 * mac_wifi pretends that it's Ethernet for such consumers. 952 * Here, unless native mode is enabled, we call into the MAC layer so 953 * that this illusion can be maintained. The plugin will optionally 954 * transform the MAC header here into something that can be passed 955 * down. The header goes from raw mode to "cooked" mode. 956 */ 957 if (!dsp->ds_native) { 958 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 959 goto discard; 960 mp = newmp; 961 } 962 963 size = MBLKL(mp); 964 965 /* 966 * Check the packet is not too big and that any remaining 967 * fragment list is composed entirely of M_DATA messages. (We 968 * know the first fragment was M_DATA otherwise we could not 969 * have got here). 970 */ 971 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 972 if (DB_TYPE(bp) != M_DATA) 973 goto discard; 974 size += MBLKL(bp); 975 } 976 977 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 978 goto discard; 979 980 /* 981 * If LSO is enabled, check the size against lso_max. Otherwise, 982 * compare the packet size with sdu_max. 983 */ 984 if (size > (dsp->ds_lso ? dsp->ds_lso_max : dsp->ds_mip->mi_sdu_max) 985 + mhi.mhi_hdrsize) 986 goto discard; 987 988 if (is_ethernet) { 989 /* 990 * Discard the packet if this is a VLAN stream but the VID in 991 * the packet is not correct. 992 */ 993 vid = VLAN_ID(mhi.mhi_tci); 994 if ((dsp->ds_vid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 995 goto discard; 996 997 /* 998 * Discard the packet if this packet is a tagged packet 999 * but both pri and VID are 0. 1000 */ 1001 pri = VLAN_PRI(mhi.mhi_tci); 1002 if (mhi.mhi_istagged && (pri == 0) && (vid == VLAN_ID_NONE)) 1003 goto discard; 1004 1005 /* 1006 * Update the priority bits to the per-stream priority if 1007 * priority is not set in the packet. Update the VID for 1008 * packets on a VLAN stream. 1009 */ 1010 pri = (pri == 0) ? dsp->ds_pri : 0; 1011 if ((pri != 0) || (dsp->ds_vid != VLAN_ID_NONE)) { 1012 if ((newmp = i_dld_ether_header_update_tag(mp, 1013 pri, dsp->ds_vid)) == NULL) { 1014 goto discard; 1015 } 1016 mp = newmp; 1017 } 1018 } 1019 1020 dld_tx_single(dsp, mp); 1021 return; 1022 1023 discard: 1024 /* TODO: bump kstat? */ 1025 freemsg(mp); 1026 } 1027 1028 /* 1029 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 1030 */ 1031 int 1032 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1033 { 1034 int err; 1035 const char *drvname; 1036 char name[MAXNAMELEN]; 1037 dls_channel_t dc; 1038 uint_t addr_length; 1039 1040 ASSERT(dsp->ds_dc == NULL); 1041 1042 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1043 return (EINVAL); 1044 1045 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 1046 1047 if (strcmp(drvname, "aggr") != 0 && 1048 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1049 return (EINVAL); 1050 1051 /* 1052 * Open a channel. 1053 */ 1054 if ((err = dls_open(name, &dc)) != 0) { 1055 (void) qassociate(dsp->ds_wq, -1); 1056 return (err); 1057 } 1058 1059 /* 1060 * Cache the MAC interface handle, a pointer to the immutable MAC 1061 * information and the current and 'factory' MAC address. 1062 */ 1063 dsp->ds_mh = dls_mac(dc); 1064 dsp->ds_mip = mac_info(dsp->ds_mh); 1065 1066 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1067 1068 addr_length = dsp->ds_mip->mi_addr_length; 1069 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 1070 1071 /* 1072 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 1073 * a non-VLAN interface). 1074 */ 1075 dsp->ds_vid = dls_vid(dc); 1076 1077 /* 1078 * Set the default packet priority. 1079 */ 1080 dsp->ds_pri = 0; 1081 1082 /* 1083 * Add a notify function so that the we get updates from the MAC. 1084 */ 1085 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 1086 1087 dsp->ds_ppa = ppa; 1088 dsp->ds_dc = dc; 1089 dsp->ds_dlstate = DL_UNBOUND; 1090 1091 return (0); 1092 } 1093 1094 /* 1095 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1096 * from close(2) for style 2. 1097 */ 1098 void 1099 dld_str_detach(dld_str_t *dsp) 1100 { 1101 ASSERT(dsp->ds_thr == 0); 1102 1103 /* 1104 * Remove the notify function. 1105 */ 1106 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 1107 1108 /* 1109 * Clear the polling and promisc flags. 1110 */ 1111 dsp->ds_polling = B_FALSE; 1112 dsp->ds_soft_ring = B_FALSE; 1113 dsp->ds_promisc = 0; 1114 1115 /* 1116 * Clear LSO flags. 1117 */ 1118 dsp->ds_lso = B_FALSE; 1119 dsp->ds_lso_max = 0; 1120 1121 /* 1122 * Close the channel. 1123 */ 1124 dls_close(dsp->ds_dc); 1125 dsp->ds_ppa = (t_uscalar_t)-1; 1126 dsp->ds_dc = NULL; 1127 dsp->ds_mh = NULL; 1128 1129 (void) qassociate(dsp->ds_wq, -1); 1130 1131 /* 1132 * Re-initialize the DLPI state machine. 1133 */ 1134 dsp->ds_dlstate = DL_UNATTACHED; 1135 1136 } 1137 1138 /* 1139 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1140 * tags before sending packets up to the DLS clients, with the exception of 1141 * special priority tagged packets, in that case, we set the VID to 0. 1142 * mp must be a VLAN tagged packet. 1143 */ 1144 static mblk_t * 1145 i_dld_ether_header_strip_tag(mblk_t *mp) 1146 { 1147 mblk_t *newmp; 1148 struct ether_vlan_header *evhp; 1149 uint16_t tci, new_tci; 1150 1151 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1152 if (DB_REF(mp) > 1) { 1153 newmp = copymsg(mp); 1154 if (newmp == NULL) 1155 return (NULL); 1156 freemsg(mp); 1157 mp = newmp; 1158 } 1159 evhp = (struct ether_vlan_header *)mp->b_rptr; 1160 1161 tci = ntohs(evhp->ether_tci); 1162 if (VLAN_PRI(tci) == 0) { 1163 /* 1164 * Priority is 0, strip the tag. 1165 */ 1166 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1167 mp->b_rptr += VLAN_TAGSZ; 1168 } else { 1169 /* 1170 * Priority is not 0, update the VID to 0. 1171 */ 1172 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1173 evhp->ether_tci = htons(new_tci); 1174 } 1175 return (mp); 1176 } 1177 1178 /* 1179 * Raw mode receive function. 1180 */ 1181 /*ARGSUSED*/ 1182 void 1183 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1184 mac_header_info_t *mhip) 1185 { 1186 dld_str_t *dsp = (dld_str_t *)arg; 1187 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1188 mblk_t *next, *newmp; 1189 1190 ASSERT(mp != NULL); 1191 do { 1192 /* 1193 * Get the pointer to the next packet in the chain and then 1194 * clear b_next before the packet gets passed on. 1195 */ 1196 next = mp->b_next; 1197 mp->b_next = NULL; 1198 1199 /* 1200 * Wind back b_rptr to point at the MAC header. 1201 */ 1202 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1203 mp->b_rptr -= mhip->mhi_hdrsize; 1204 1205 /* 1206 * Certain MAC type plugins provide an illusion for raw 1207 * DLPI consumers. They pretend that the MAC layer is 1208 * something that it's not for the benefit of observability 1209 * tools. For example, mac_wifi pretends that it's Ethernet 1210 * for such consumers. Here, unless native mode is enabled, 1211 * we call into the MAC layer so that this illusion can be 1212 * maintained. The plugin will optionally transform the MAC 1213 * header here into something that can be passed up to raw 1214 * consumers. The header goes from "cooked" mode to raw mode. 1215 */ 1216 if (!dsp->ds_native) { 1217 newmp = mac_header_uncook(dsp->ds_mh, mp); 1218 if (newmp == NULL) { 1219 freemsg(mp); 1220 goto next; 1221 } 1222 mp = newmp; 1223 } 1224 1225 /* 1226 * Strip the VLAN tag for VLAN streams. 1227 */ 1228 if (is_ethernet && dsp->ds_vid != VLAN_ID_NONE) { 1229 newmp = i_dld_ether_header_strip_tag(mp); 1230 if (newmp == NULL) { 1231 freemsg(mp); 1232 goto next; 1233 } 1234 mp = newmp; 1235 } 1236 1237 /* 1238 * Pass the packet on. 1239 */ 1240 if (canputnext(dsp->ds_rq)) 1241 putnext(dsp->ds_rq, mp); 1242 else 1243 freemsg(mp); 1244 1245 next: 1246 /* 1247 * Move on to the next packet in the chain. 1248 */ 1249 mp = next; 1250 } while (mp != NULL); 1251 } 1252 1253 /* 1254 * Fast-path receive function. 1255 */ 1256 /*ARGSUSED*/ 1257 void 1258 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1259 mac_header_info_t *mhip) 1260 { 1261 dld_str_t *dsp = (dld_str_t *)arg; 1262 mblk_t *next; 1263 size_t offset = 0; 1264 1265 /* 1266 * MAC header stripping rules: 1267 * - Tagged packets: 1268 * a. VLAN streams. Strip the whole VLAN header including the tag. 1269 * b. Physical streams 1270 * - VLAN packets (non-zero VID). The stream must be either a 1271 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1272 * Strip the Ethernet header but keep the VLAN header. 1273 * - Special tagged packets (zero VID) 1274 * * The stream is either a DL_PROMISC_SAP listener or a 1275 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1276 * keep the VLAN header. 1277 * * Otherwise, strip the whole VLAN header. 1278 * - Untagged packets. Strip the whole MAC header. 1279 */ 1280 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1281 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1282 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1283 offset = VLAN_TAGSZ; 1284 } 1285 1286 ASSERT(mp != NULL); 1287 do { 1288 /* 1289 * Get the pointer to the next packet in the chain and then 1290 * clear b_next before the packet gets passed on. 1291 */ 1292 next = mp->b_next; 1293 mp->b_next = NULL; 1294 1295 /* 1296 * Wind back b_rptr to point at the VLAN header. 1297 */ 1298 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1299 mp->b_rptr -= offset; 1300 1301 /* 1302 * Pass the packet on. 1303 */ 1304 if (canputnext(dsp->ds_rq)) 1305 putnext(dsp->ds_rq, mp); 1306 else 1307 freemsg(mp); 1308 /* 1309 * Move on to the next packet in the chain. 1310 */ 1311 mp = next; 1312 } while (mp != NULL); 1313 } 1314 1315 /* 1316 * Default receive function (send DL_UNITDATA_IND messages). 1317 */ 1318 /*ARGSUSED*/ 1319 void 1320 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1321 mac_header_info_t *mhip) 1322 { 1323 dld_str_t *dsp = (dld_str_t *)arg; 1324 mblk_t *ud_mp; 1325 mblk_t *next; 1326 size_t offset = 0; 1327 boolean_t strip_vlan = B_TRUE; 1328 1329 /* 1330 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1331 */ 1332 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1333 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1334 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1335 offset = VLAN_TAGSZ; 1336 strip_vlan = B_FALSE; 1337 } 1338 1339 ASSERT(mp != NULL); 1340 do { 1341 /* 1342 * Get the pointer to the next packet in the chain and then 1343 * clear b_next before the packet gets passed on. 1344 */ 1345 next = mp->b_next; 1346 mp->b_next = NULL; 1347 1348 /* 1349 * Wind back b_rptr to point at the MAC header. 1350 */ 1351 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1352 mp->b_rptr -= mhip->mhi_hdrsize; 1353 1354 /* 1355 * Create the DL_UNITDATA_IND M_PROTO. 1356 */ 1357 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1358 freemsgchain(mp); 1359 return; 1360 } 1361 1362 /* 1363 * Advance b_rptr to point at the payload (or the VLAN header). 1364 */ 1365 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1366 1367 /* 1368 * Prepend the DL_UNITDATA_IND. 1369 */ 1370 ud_mp->b_cont = mp; 1371 1372 /* 1373 * Send the message. 1374 */ 1375 if (canputnext(dsp->ds_rq)) 1376 putnext(dsp->ds_rq, ud_mp); 1377 else 1378 freemsg(ud_mp); 1379 1380 /* 1381 * Move on to the next packet in the chain. 1382 */ 1383 mp = next; 1384 } while (mp != NULL); 1385 } 1386 1387 /* 1388 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1389 * current state of the interface. 1390 */ 1391 void 1392 dld_str_notify_ind(dld_str_t *dsp) 1393 { 1394 mac_notify_type_t type; 1395 1396 for (type = 0; type < MAC_NNOTE; type++) 1397 str_notify(dsp, type); 1398 } 1399 1400 typedef struct dl_unitdata_ind_wrapper { 1401 dl_unitdata_ind_t dl_unitdata; 1402 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1403 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1404 } dl_unitdata_ind_wrapper_t; 1405 1406 /* 1407 * Create a DL_UNITDATA_IND M_PROTO message. 1408 */ 1409 static mblk_t * 1410 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1411 { 1412 mblk_t *nmp; 1413 dl_unitdata_ind_wrapper_t *dlwp; 1414 dl_unitdata_ind_t *dlp; 1415 mac_header_info_t mhi; 1416 uint_t addr_length; 1417 uint8_t *daddr; 1418 uint8_t *saddr; 1419 1420 /* 1421 * Get the packet header information. 1422 */ 1423 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 1424 return (NULL); 1425 1426 /* 1427 * Allocate a message large enough to contain the wrapper structure 1428 * defined above. 1429 */ 1430 if ((nmp = mexchange(dsp->ds_wq, NULL, 1431 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1432 DL_UNITDATA_IND)) == NULL) 1433 return (NULL); 1434 1435 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1436 1437 dlp = &(dlwp->dl_unitdata); 1438 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1439 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1440 1441 /* 1442 * Copy in the destination address. 1443 */ 1444 addr_length = dsp->ds_mip->mi_addr_length; 1445 daddr = dlwp->dl_dest_addr; 1446 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1447 bcopy(mhi.mhi_daddr, daddr, addr_length); 1448 1449 /* 1450 * Set the destination DLSAP to the SAP value encoded in the packet. 1451 */ 1452 if (mhi.mhi_istagged && !strip_vlan) 1453 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1454 else 1455 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1456 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1457 1458 /* 1459 * If the destination address was multicast or broadcast then the 1460 * dl_group_address field should be non-zero. 1461 */ 1462 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1463 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1464 1465 /* 1466 * Copy in the source address if one exists. Some MAC types (DL_IB 1467 * for example) may not have access to source information. 1468 */ 1469 if (mhi.mhi_saddr == NULL) { 1470 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1471 } else { 1472 saddr = dlwp->dl_src_addr; 1473 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1474 bcopy(mhi.mhi_saddr, saddr, addr_length); 1475 1476 /* 1477 * Set the source DLSAP to the packet ethertype. 1478 */ 1479 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1480 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1481 } 1482 1483 return (nmp); 1484 } 1485 1486 /* 1487 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1488 */ 1489 static void 1490 str_notify_promisc_on_phys(dld_str_t *dsp) 1491 { 1492 mblk_t *mp; 1493 dl_notify_ind_t *dlip; 1494 1495 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1496 return; 1497 1498 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1499 M_PROTO, 0)) == NULL) 1500 return; 1501 1502 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1503 dlip = (dl_notify_ind_t *)mp->b_rptr; 1504 dlip->dl_primitive = DL_NOTIFY_IND; 1505 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1506 1507 qreply(dsp->ds_wq, mp); 1508 } 1509 1510 /* 1511 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1512 */ 1513 static void 1514 str_notify_promisc_off_phys(dld_str_t *dsp) 1515 { 1516 mblk_t *mp; 1517 dl_notify_ind_t *dlip; 1518 1519 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1520 return; 1521 1522 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1523 M_PROTO, 0)) == NULL) 1524 return; 1525 1526 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1527 dlip = (dl_notify_ind_t *)mp->b_rptr; 1528 dlip->dl_primitive = DL_NOTIFY_IND; 1529 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1530 1531 qreply(dsp->ds_wq, mp); 1532 } 1533 1534 /* 1535 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1536 */ 1537 static void 1538 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1539 { 1540 mblk_t *mp; 1541 dl_notify_ind_t *dlip; 1542 uint_t addr_length; 1543 uint16_t ethertype; 1544 1545 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1546 return; 1547 1548 addr_length = dsp->ds_mip->mi_addr_length; 1549 if ((mp = mexchange(dsp->ds_wq, NULL, 1550 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1551 M_PROTO, 0)) == NULL) 1552 return; 1553 1554 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1555 dlip = (dl_notify_ind_t *)mp->b_rptr; 1556 dlip->dl_primitive = DL_NOTIFY_IND; 1557 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1558 dlip->dl_data = DL_CURR_PHYS_ADDR; 1559 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1560 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1561 1562 bcopy(addr, &dlip[1], addr_length); 1563 1564 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1565 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1566 ethertype; 1567 1568 qreply(dsp->ds_wq, mp); 1569 } 1570 1571 /* 1572 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1573 */ 1574 static void 1575 str_notify_link_up(dld_str_t *dsp) 1576 { 1577 mblk_t *mp; 1578 dl_notify_ind_t *dlip; 1579 1580 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1581 return; 1582 1583 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1584 M_PROTO, 0)) == NULL) 1585 return; 1586 1587 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1588 dlip = (dl_notify_ind_t *)mp->b_rptr; 1589 dlip->dl_primitive = DL_NOTIFY_IND; 1590 dlip->dl_notification = DL_NOTE_LINK_UP; 1591 1592 qreply(dsp->ds_wq, mp); 1593 } 1594 1595 /* 1596 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1597 */ 1598 static void 1599 str_notify_link_down(dld_str_t *dsp) 1600 { 1601 mblk_t *mp; 1602 dl_notify_ind_t *dlip; 1603 1604 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1605 return; 1606 1607 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1608 M_PROTO, 0)) == NULL) 1609 return; 1610 1611 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1612 dlip = (dl_notify_ind_t *)mp->b_rptr; 1613 dlip->dl_primitive = DL_NOTIFY_IND; 1614 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1615 1616 qreply(dsp->ds_wq, mp); 1617 } 1618 1619 /* 1620 * DL_NOTIFY_IND: DL_NOTE_SPEED 1621 */ 1622 static void 1623 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1624 { 1625 mblk_t *mp; 1626 dl_notify_ind_t *dlip; 1627 1628 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1629 return; 1630 1631 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1632 M_PROTO, 0)) == NULL) 1633 return; 1634 1635 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1636 dlip = (dl_notify_ind_t *)mp->b_rptr; 1637 dlip->dl_primitive = DL_NOTIFY_IND; 1638 dlip->dl_notification = DL_NOTE_SPEED; 1639 dlip->dl_data = speed; 1640 1641 qreply(dsp->ds_wq, mp); 1642 } 1643 1644 /* 1645 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1646 */ 1647 static void 1648 str_notify_capab_reneg(dld_str_t *dsp) 1649 { 1650 mblk_t *mp; 1651 dl_notify_ind_t *dlip; 1652 1653 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1654 return; 1655 1656 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1657 M_PROTO, 0)) == NULL) 1658 return; 1659 1660 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1661 dlip = (dl_notify_ind_t *)mp->b_rptr; 1662 dlip->dl_primitive = DL_NOTIFY_IND; 1663 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1664 1665 qreply(dsp->ds_wq, mp); 1666 } 1667 1668 /* 1669 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1670 */ 1671 static void 1672 str_notify_fastpath_flush(dld_str_t *dsp) 1673 { 1674 mblk_t *mp; 1675 dl_notify_ind_t *dlip; 1676 1677 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1678 return; 1679 1680 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1681 M_PROTO, 0)) == NULL) 1682 return; 1683 1684 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1685 dlip = (dl_notify_ind_t *)mp->b_rptr; 1686 dlip->dl_primitive = DL_NOTIFY_IND; 1687 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1688 1689 qreply(dsp->ds_wq, mp); 1690 } 1691 1692 /* 1693 * MAC notification callback. 1694 */ 1695 static void 1696 str_notify(void *arg, mac_notify_type_t type) 1697 { 1698 dld_str_t *dsp = (dld_str_t *)arg; 1699 queue_t *q = dsp->ds_wq; 1700 1701 switch (type) { 1702 case MAC_NOTE_TX: 1703 qenable(q); 1704 break; 1705 1706 case MAC_NOTE_DEVPROMISC: 1707 /* 1708 * Send the appropriate DL_NOTIFY_IND. 1709 */ 1710 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1711 str_notify_promisc_on_phys(dsp); 1712 else 1713 str_notify_promisc_off_phys(dsp); 1714 break; 1715 1716 case MAC_NOTE_PROMISC: 1717 break; 1718 1719 case MAC_NOTE_UNICST: 1720 /* 1721 * This notification is sent whenever the MAC unicast address 1722 * changes. We need to re-cache the address. 1723 */ 1724 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1725 1726 /* 1727 * Send the appropriate DL_NOTIFY_IND. 1728 */ 1729 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1730 break; 1731 1732 case MAC_NOTE_LINK: 1733 /* 1734 * This notification is sent every time the MAC driver 1735 * updates the link state. 1736 */ 1737 switch (mac_link_get(dsp->ds_mh)) { 1738 case LINK_STATE_UP: { 1739 uint64_t speed; 1740 /* 1741 * The link is up so send the appropriate 1742 * DL_NOTIFY_IND. 1743 */ 1744 str_notify_link_up(dsp); 1745 1746 speed = mac_stat_get(dsp->ds_mh, MAC_STAT_IFSPEED); 1747 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1748 break; 1749 } 1750 case LINK_STATE_DOWN: 1751 /* 1752 * The link is down so send the appropriate 1753 * DL_NOTIFY_IND. 1754 */ 1755 str_notify_link_down(dsp); 1756 break; 1757 1758 default: 1759 break; 1760 } 1761 break; 1762 1763 case MAC_NOTE_RESOURCE: 1764 /* 1765 * This notification is sent whenever the MAC resources 1766 * change. We need to renegotiate the capabilities. 1767 * Send the appropriate DL_NOTIFY_IND. 1768 */ 1769 str_notify_capab_reneg(dsp); 1770 break; 1771 1772 case MAC_NOTE_FASTPATH_FLUSH: 1773 str_notify_fastpath_flush(dsp); 1774 break; 1775 1776 default: 1777 ASSERT(B_FALSE); 1778 break; 1779 } 1780 } 1781 1782 /* 1783 * Enqueue one or more messages to the transmit queue. 1784 * Caller specifies the insertion position (head/tail). 1785 */ 1786 void 1787 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1788 { 1789 mblk_t *tail; 1790 queue_t *q = dsp->ds_wq; 1791 uint_t cnt, msgcnt; 1792 uint_t tot_cnt, tot_msgcnt; 1793 1794 ASSERT(DB_TYPE(mp) == M_DATA); 1795 /* Calculate total size and count of the packet(s) */ 1796 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1797 tail->b_next != NULL; tail = tail->b_next) { 1798 ASSERT(DB_TYPE(tail->b_next) == M_DATA); 1799 cnt += msgdsize(tail->b_next); 1800 msgcnt++; 1801 } 1802 1803 mutex_enter(&dsp->ds_tx_list_lock); 1804 /* 1805 * If the queue depth would exceed the allowed threshold, drop 1806 * new packet(s) and drain those already in the queue. 1807 */ 1808 tot_cnt = dsp->ds_tx_cnt + cnt; 1809 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1810 1811 if (!head_insert && 1812 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1813 ASSERT(dsp->ds_tx_qbusy); 1814 mutex_exit(&dsp->ds_tx_list_lock); 1815 freemsgchain(mp); 1816 goto done; 1817 } 1818 1819 /* Update the queue size parameters */ 1820 dsp->ds_tx_cnt = tot_cnt; 1821 dsp->ds_tx_msgcnt = tot_msgcnt; 1822 1823 /* 1824 * If the transmit queue is currently empty and we are 1825 * about to deposit the packet(s) there, switch mode to 1826 * "busy" and raise flow-control condition. 1827 */ 1828 if (!dsp->ds_tx_qbusy) { 1829 dsp->ds_tx_qbusy = B_TRUE; 1830 ASSERT(dsp->ds_tx_flow_mp != NULL); 1831 (void) putq(q, dsp->ds_tx_flow_mp); 1832 dsp->ds_tx_flow_mp = NULL; 1833 } 1834 1835 if (!head_insert) { 1836 /* Tail insertion */ 1837 if (dsp->ds_tx_list_head == NULL) 1838 dsp->ds_tx_list_head = mp; 1839 else 1840 dsp->ds_tx_list_tail->b_next = mp; 1841 dsp->ds_tx_list_tail = tail; 1842 } else { 1843 /* Head insertion */ 1844 tail->b_next = dsp->ds_tx_list_head; 1845 if (dsp->ds_tx_list_head == NULL) 1846 dsp->ds_tx_list_tail = tail; 1847 dsp->ds_tx_list_head = mp; 1848 } 1849 mutex_exit(&dsp->ds_tx_list_lock); 1850 done: 1851 /* Schedule service thread to drain the transmit queue */ 1852 if (!head_insert) 1853 qenable(q); 1854 } 1855 1856 void 1857 dld_tx_flush(dld_str_t *dsp) 1858 { 1859 mutex_enter(&dsp->ds_tx_list_lock); 1860 if (dsp->ds_tx_list_head != NULL) { 1861 freemsgchain(dsp->ds_tx_list_head); 1862 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1863 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1864 if (dsp->ds_tx_qbusy) { 1865 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1866 ASSERT(dsp->ds_tx_flow_mp != NULL); 1867 dsp->ds_tx_qbusy = B_FALSE; 1868 } 1869 } 1870 mutex_exit(&dsp->ds_tx_list_lock); 1871 } 1872 1873 /* 1874 * Process an M_IOCTL message. 1875 */ 1876 static void 1877 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1878 { 1879 uint_t cmd; 1880 1881 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1882 ASSERT(dsp->ds_type == DLD_DLPI); 1883 1884 switch (cmd) { 1885 case DLIOCNATIVE: 1886 ioc_native(dsp, mp); 1887 break; 1888 case DLIOCRAW: 1889 ioc_raw(dsp, mp); 1890 break; 1891 case DLIOCHDRINFO: 1892 ioc_fast(dsp, mp); 1893 break; 1894 default: 1895 ioc(dsp, mp); 1896 } 1897 } 1898 1899 /* 1900 * DLIOCNATIVE 1901 */ 1902 static void 1903 ioc_native(dld_str_t *dsp, mblk_t *mp) 1904 { 1905 queue_t *q = dsp->ds_wq; 1906 const mac_info_t *mip = dsp->ds_mip; 1907 1908 rw_enter(&dsp->ds_lock, RW_WRITER); 1909 1910 /* 1911 * Native mode can be enabled if it's disabled and if the 1912 * native media type is different. 1913 */ 1914 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 1915 dsp->ds_native = B_TRUE; 1916 1917 rw_exit(&dsp->ds_lock); 1918 1919 if (dsp->ds_native) 1920 miocack(q, mp, 0, mip->mi_nativemedia); 1921 else 1922 miocnak(q, mp, 0, ENOTSUP); 1923 } 1924 1925 /* 1926 * DLIOCRAW 1927 */ 1928 static void 1929 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1930 { 1931 queue_t *q = dsp->ds_wq; 1932 1933 rw_enter(&dsp->ds_lock, RW_WRITER); 1934 if (dsp->ds_polling || dsp->ds_soft_ring) { 1935 rw_exit(&dsp->ds_lock); 1936 miocnak(q, mp, 0, EPROTO); 1937 return; 1938 } 1939 1940 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1941 /* 1942 * Set the receive callback. 1943 */ 1944 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1945 } 1946 1947 /* 1948 * Note that raw mode is enabled. 1949 */ 1950 dsp->ds_mode = DLD_RAW; 1951 1952 rw_exit(&dsp->ds_lock); 1953 miocack(q, mp, 0, 0); 1954 } 1955 1956 /* 1957 * DLIOCHDRINFO 1958 */ 1959 static void 1960 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1961 { 1962 dl_unitdata_req_t *dlp; 1963 off_t off; 1964 size_t len; 1965 const uint8_t *addr; 1966 uint16_t sap; 1967 mblk_t *nmp; 1968 mblk_t *hmp; 1969 uint_t addr_length; 1970 queue_t *q = dsp->ds_wq; 1971 int err; 1972 dls_channel_t dc; 1973 1974 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1975 err = ENOTSUP; 1976 goto failed; 1977 } 1978 1979 /* 1980 * DLIOCHDRINFO should only come from IP. The one initiated from 1981 * user-land should not be allowed. 1982 */ 1983 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 1984 err = EINVAL; 1985 goto failed; 1986 } 1987 1988 nmp = mp->b_cont; 1989 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1990 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1991 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1992 err = EINVAL; 1993 goto failed; 1994 } 1995 1996 off = dlp->dl_dest_addr_offset; 1997 len = dlp->dl_dest_addr_length; 1998 1999 if (!MBLKIN(nmp, off, len)) { 2000 err = EINVAL; 2001 goto failed; 2002 } 2003 2004 rw_enter(&dsp->ds_lock, RW_READER); 2005 if (dsp->ds_dlstate != DL_IDLE) { 2006 rw_exit(&dsp->ds_lock); 2007 err = ENOTSUP; 2008 goto failed; 2009 } 2010 2011 addr_length = dsp->ds_mip->mi_addr_length; 2012 if (len != addr_length + sizeof (uint16_t)) { 2013 rw_exit(&dsp->ds_lock); 2014 err = EINVAL; 2015 goto failed; 2016 } 2017 2018 addr = nmp->b_rptr + off; 2019 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2020 dc = dsp->ds_dc; 2021 2022 if ((hmp = dls_header(dc, addr, sap, 0, NULL)) == NULL) { 2023 rw_exit(&dsp->ds_lock); 2024 err = ENOMEM; 2025 goto failed; 2026 } 2027 2028 /* 2029 * This is a performance optimization. We originally entered 2030 * as reader and only become writer upon transitioning into 2031 * the DLD_FASTPATH mode for the first time. Otherwise we 2032 * stay as reader and return the fast-path header to IP. 2033 */ 2034 if (dsp->ds_mode != DLD_FASTPATH) { 2035 if (!rw_tryupgrade(&dsp->ds_lock)) { 2036 rw_exit(&dsp->ds_lock); 2037 rw_enter(&dsp->ds_lock, RW_WRITER); 2038 2039 /* 2040 * State may have changed before we re-acquired 2041 * the writer lock in case the upgrade failed. 2042 */ 2043 if (dsp->ds_dlstate != DL_IDLE) { 2044 rw_exit(&dsp->ds_lock); 2045 err = ENOTSUP; 2046 goto failed; 2047 } 2048 } 2049 2050 /* 2051 * Set the receive callback (unless polling is enabled). 2052 */ 2053 if (!dsp->ds_polling && !dsp->ds_soft_ring) 2054 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 2055 2056 /* 2057 * Note that fast-path mode is enabled. 2058 */ 2059 dsp->ds_mode = DLD_FASTPATH; 2060 } 2061 rw_exit(&dsp->ds_lock); 2062 2063 freemsg(nmp->b_cont); 2064 nmp->b_cont = hmp; 2065 2066 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2067 return; 2068 failed: 2069 miocnak(q, mp, 0, err); 2070 } 2071 2072 /* 2073 * Catch-all handler. 2074 */ 2075 static void 2076 ioc(dld_str_t *dsp, mblk_t *mp) 2077 { 2078 queue_t *q = dsp->ds_wq; 2079 mac_handle_t mh; 2080 2081 rw_enter(&dsp->ds_lock, RW_READER); 2082 if (dsp->ds_dlstate == DL_UNATTACHED) { 2083 rw_exit(&dsp->ds_lock); 2084 miocnak(q, mp, 0, EINVAL); 2085 return; 2086 } 2087 mh = dsp->ds_mh; 2088 ASSERT(mh != NULL); 2089 rw_exit(&dsp->ds_lock); 2090 mac_ioctl(mh, q, mp); 2091 } 2092 2093 /* 2094 * Allocate a new minor number. 2095 */ 2096 static minor_t 2097 dld_minor_hold(boolean_t sleep) 2098 { 2099 minor_t minor; 2100 2101 /* 2102 * Grab a value from the arena. 2103 */ 2104 atomic_add_32(&minor_count, 1); 2105 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 2106 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 2107 atomic_add_32(&minor_count, -1); 2108 return (0); 2109 } 2110 2111 return (minor); 2112 } 2113 2114 /* 2115 * Release a previously allocated minor number. 2116 */ 2117 static void 2118 dld_minor_rele(minor_t minor) 2119 { 2120 /* 2121 * Return the value to the arena. 2122 */ 2123 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 2124 2125 atomic_add_32(&minor_count, -1); 2126 } 2127