1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Data-Link Driver 28 */ 29 30 #include <inet/common.h> 31 #include <sys/strsubr.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/vlan.h> 35 #include <sys/dld_impl.h> 36 #include <sys/cpuvar.h> 37 #include <sys/callb.h> 38 #include <sys/list.h> 39 #include <sys/mac_client.h> 40 #include <sys/mac_client_priv.h> 41 42 static int str_constructor(void *, void *, int); 43 static void str_destructor(void *, void *); 44 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 45 static void str_notify_promisc_on_phys(dld_str_t *); 46 static void str_notify_promisc_off_phys(dld_str_t *); 47 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 48 static void str_notify_link_up(dld_str_t *); 49 static void str_notify_link_down(dld_str_t *); 50 static void str_notify_capab_reneg(dld_str_t *); 51 static void str_notify_speed(dld_str_t *, uint32_t); 52 53 static void ioc_native(dld_str_t *, mblk_t *); 54 static void ioc_margin(dld_str_t *, mblk_t *); 55 static void ioc_raw(dld_str_t *, mblk_t *); 56 static void ioc_fast(dld_str_t *, mblk_t *); 57 static void ioc_lowlink(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static void dld_wput_nondata(dld_str_t *, mblk_t *); 61 62 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 63 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t, 64 link_tagmode_t); 65 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t); 66 67 static uint32_t str_count; 68 static kmem_cache_t *str_cachep; 69 static mod_hash_t *str_hashp; 70 71 #define STR_HASHSZ 64 72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 73 74 #define dld_taskq system_taskq 75 76 static kmutex_t dld_taskq_lock; 77 static kcondvar_t dld_taskq_cv; 78 static list_t dld_taskq_list; /* List of dld_str_t */ 79 boolean_t dld_taskq_quit; 80 boolean_t dld_taskq_done; 81 82 static void dld_taskq_dispatch(void); 83 84 /* 85 * Some notes on entry points, flow-control, queueing. 86 * 87 * This driver exports the traditional STREAMS put entry point as well as 88 * the non-STREAMS fast-path transmit routine which is provided to IP via 89 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 90 * and data operations, while the fast-path routine deals only with M_DATA 91 * fast-path packets. Regardless of the entry point, all outbound packets 92 * will end up in DLD_TX(), where they will be delivered to the MAC layer. 93 * 94 * The transmit logic operates in the following way: All packets coming 95 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control 96 * happens when the MAC layer indicates the packets couldn't be 97 * transmitted due to 1) lack of resources (e.g. running out of 98 * descriptors), or 2) reaching the allowed bandwidth limit for this 99 * particular flow. The indication comes in the form of a Tx cookie that 100 * identifies the blocked ring. In such case, DLD will place a 101 * dummy message on its write-side STREAMS queue so that the queue is 102 * marked as "full". Any subsequent packets arriving at the driver will 103 * still be sent to the MAC layer where it either gets queued in the Tx 104 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS 105 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX. 106 * When the write service procedure runs, it will remove the dummy 107 * message from the write-side STREAMS queue; in effect this will trigger 108 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0, 109 * respectively, due to the above reasons. 110 * 111 * All non-data operations, both DLPI and ioctls are single threaded on a per 112 * dld_str_t endpoint. This is done using a taskq so that the control operation 113 * has kernel context and can cv_wait for resources. In addition all set type 114 * operations that involve mac level state modification are serialized on a 115 * per mac end point using the perimeter mechanism provided by the mac layer. 116 * This serializes all mac clients trying to modify a single mac end point over 117 * the entire sequence of mac calls made by that client as an atomic unit. The 118 * mac framework locking is described in mac.c. A critical element is that 119 * DLD/DLS does not hold any locks across the mac perimeter. 120 * 121 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 122 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 123 * match dev_t. If a stream is found and it is attached, its dev_info_t * 124 * is returned. If the mac handle is non-null, it can be safely accessed 125 * below. The mac handle won't be freed until the mac_unregister which 126 * won't happen until the driver detaches. The DDI framework ensures that 127 * the detach won't happen while a getinfo is in progress. 128 */ 129 typedef struct i_dld_str_state_s { 130 major_t ds_major; 131 minor_t ds_minor; 132 dev_info_t *ds_dip; 133 } i_dld_str_state_t; 134 135 /* ARGSUSED */ 136 static uint_t 137 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 138 { 139 i_dld_str_state_t *statep = arg; 140 dld_str_t *dsp = (dld_str_t *)val; 141 mac_handle_t mh; 142 143 if (statep->ds_major != dsp->ds_major) 144 return (MH_WALK_CONTINUE); 145 146 ASSERT(statep->ds_minor != 0); 147 mh = dsp->ds_mh; 148 149 if (statep->ds_minor == dsp->ds_minor) { 150 /* 151 * Clone: a clone minor is unique. we can terminate the 152 * walk if we find a matching stream -- even if we fail 153 * to obtain the devinfo. 154 */ 155 if (mh != NULL) 156 statep->ds_dip = mac_devinfo_get(mh); 157 return (MH_WALK_TERMINATE); 158 } 159 return (MH_WALK_CONTINUE); 160 } 161 162 static dev_info_t * 163 dld_finddevinfo(dev_t dev) 164 { 165 dev_info_t *dip; 166 i_dld_str_state_t state; 167 168 if (getminor(dev) == 0) 169 return (NULL); 170 171 /* 172 * See if it's a minor node of a link 173 */ 174 if ((dip = dls_link_devinfo(dev)) != NULL) 175 return (dip); 176 177 state.ds_minor = getminor(dev); 178 state.ds_major = getmajor(dev); 179 state.ds_dip = NULL; 180 181 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 182 return (state.ds_dip); 183 } 184 185 /* 186 * devo_getinfo: getinfo(9e) 187 */ 188 /*ARGSUSED*/ 189 int 190 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 191 { 192 dev_info_t *devinfo; 193 minor_t minor = getminor((dev_t)arg); 194 int rc = DDI_FAILURE; 195 196 switch (cmd) { 197 case DDI_INFO_DEVT2DEVINFO: 198 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 199 *(dev_info_t **)resp = devinfo; 200 rc = DDI_SUCCESS; 201 } 202 break; 203 case DDI_INFO_DEVT2INSTANCE: 204 if (minor > 0 && minor <= DLS_MAX_MINOR) { 205 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 206 rc = DDI_SUCCESS; 207 } else if (minor > DLS_MAX_MINOR && 208 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 209 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 210 rc = DDI_SUCCESS; 211 } 212 break; 213 } 214 return (rc); 215 } 216 217 void * 218 dld_str_private(queue_t *q) 219 { 220 return (((dld_str_t *)(q->q_ptr))->ds_private); 221 } 222 223 int 224 dld_str_open(queue_t *rq, dev_t *devp, void *private) 225 { 226 dld_str_t *dsp; 227 major_t major; 228 minor_t minor; 229 int err; 230 231 major = getmajor(*devp); 232 minor = getminor(*devp); 233 234 /* 235 * Create a new dld_str_t for the stream. This will grab a new minor 236 * number that will be handed back in the cloned dev_t. Creation may 237 * fail if we can't allocate the dummy mblk used for flow-control. 238 */ 239 dsp = dld_str_create(rq, DLD_DLPI, major, 240 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 241 if (dsp == NULL) 242 return (ENOSR); 243 244 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 245 dsp->ds_private = private; 246 if (minor != 0) { 247 /* 248 * Style 1 open 249 */ 250 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 251 goto failed; 252 253 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 254 } else { 255 (void) qassociate(rq, -1); 256 } 257 258 /* 259 * Enable the queue srv(9e) routine. 260 */ 261 qprocson(rq); 262 263 /* 264 * Construct a cloned dev_t to hand back. 265 */ 266 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 267 return (0); 268 269 failed: 270 dld_str_destroy(dsp); 271 return (err); 272 } 273 274 int 275 dld_str_close(queue_t *rq) 276 { 277 dld_str_t *dsp = rq->q_ptr; 278 279 /* 280 * All modules on top have been popped off. So there can't be any 281 * threads from the top. 282 */ 283 ASSERT(dsp->ds_datathr_cnt == 0); 284 285 /* 286 * Wait until pending DLPI requests are processed. 287 */ 288 mutex_enter(&dsp->ds_lock); 289 while (dsp->ds_dlpi_pending) 290 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock); 291 mutex_exit(&dsp->ds_lock); 292 293 294 /* 295 * This stream was open to a provider node. Check to see 296 * if it has been cleanly shut down. 297 */ 298 if (dsp->ds_dlstate != DL_UNATTACHED) { 299 /* 300 * The stream is either open to a style 1 provider or 301 * this is not clean shutdown. Detach from the PPA. 302 * (This is still ok even in the style 1 case). 303 */ 304 dld_str_detach(dsp); 305 } 306 307 dld_str_destroy(dsp); 308 return (0); 309 } 310 311 /* 312 * qi_qopen: open(9e) 313 */ 314 /*ARGSUSED*/ 315 int 316 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 317 { 318 if (sflag == MODOPEN) 319 return (ENOTSUP); 320 321 /* 322 * This is a cloning driver and therefore each queue should only 323 * ever get opened once. 324 */ 325 if (rq->q_ptr != NULL) 326 return (EBUSY); 327 328 return (dld_str_open(rq, devp, NULL)); 329 } 330 331 /* 332 * qi_qclose: close(9e) 333 */ 334 int 335 dld_close(queue_t *rq) 336 { 337 /* 338 * Disable the queue srv(9e) routine. 339 */ 340 qprocsoff(rq); 341 342 return (dld_str_close(rq)); 343 } 344 345 /* 346 * qi_qputp: put(9e) 347 */ 348 void 349 dld_wput(queue_t *wq, mblk_t *mp) 350 { 351 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 352 dld_str_mode_t mode; 353 354 switch (DB_TYPE(mp)) { 355 case M_DATA: 356 mutex_enter(&dsp->ds_lock); 357 mode = dsp->ds_mode; 358 if ((dsp->ds_dlstate != DL_IDLE) || 359 (mode != DLD_FASTPATH && mode != DLD_RAW)) { 360 mutex_exit(&dsp->ds_lock); 361 freemsg(mp); 362 break; 363 } 364 365 DLD_DATATHR_INC(dsp); 366 mutex_exit(&dsp->ds_lock); 367 if (mode == DLD_FASTPATH) { 368 if (dsp->ds_mip->mi_media == DL_ETHER && 369 (MBLKL(mp) < sizeof (struct ether_header))) { 370 freemsg(mp); 371 } else { 372 (void) str_mdata_fastpath_put(dsp, mp, 0, 0); 373 } 374 } else { 375 str_mdata_raw_put(dsp, mp); 376 } 377 DLD_DATATHR_DCR(dsp); 378 break; 379 case M_PROTO: 380 case M_PCPROTO: { 381 t_uscalar_t prim; 382 383 if (MBLKL(mp) < sizeof (t_uscalar_t)) 384 break; 385 386 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 387 388 if (prim == DL_UNITDATA_REQ) { 389 proto_unitdata_req(dsp, mp); 390 } else { 391 dld_wput_nondata(dsp, mp); 392 } 393 break; 394 } 395 396 case M_IOCTL: 397 dld_wput_nondata(dsp, mp); 398 break; 399 400 case M_FLUSH: 401 if (*mp->b_rptr & FLUSHW) { 402 DLD_CLRQFULL(dsp); 403 *mp->b_rptr &= ~FLUSHW; 404 } 405 406 if (*mp->b_rptr & FLUSHR) { 407 qreply(wq, mp); 408 } else { 409 freemsg(mp); 410 } 411 break; 412 413 default: 414 freemsg(mp); 415 break; 416 } 417 } 418 419 /* 420 * qi_srvp: srv(9e) 421 */ 422 void 423 dld_wsrv(queue_t *wq) 424 { 425 dld_str_t *dsp = wq->q_ptr; 426 427 DLD_CLRQFULL(dsp); 428 } 429 430 void 431 dld_init_ops(struct dev_ops *ops, const char *name) 432 { 433 struct streamtab *stream; 434 struct qinit *rq, *wq; 435 struct module_info *modinfo; 436 437 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 438 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 439 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 440 modinfo->mi_minpsz = 0; 441 modinfo->mi_maxpsz = 64*1024; 442 modinfo->mi_hiwat = 1; 443 modinfo->mi_lowat = 0; 444 445 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 446 rq->qi_qopen = dld_open; 447 rq->qi_qclose = dld_close; 448 rq->qi_minfo = modinfo; 449 450 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 451 wq->qi_putp = (pfi_t)dld_wput; 452 wq->qi_srvp = (pfi_t)dld_wsrv; 453 wq->qi_minfo = modinfo; 454 455 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 456 stream->st_rdinit = rq; 457 stream->st_wrinit = wq; 458 ops->devo_cb_ops->cb_str = stream; 459 460 if (ops->devo_getinfo == NULL) 461 ops->devo_getinfo = &dld_getinfo; 462 } 463 464 void 465 dld_fini_ops(struct dev_ops *ops) 466 { 467 struct streamtab *stream; 468 struct qinit *rq, *wq; 469 struct module_info *modinfo; 470 471 stream = ops->devo_cb_ops->cb_str; 472 rq = stream->st_rdinit; 473 wq = stream->st_wrinit; 474 modinfo = rq->qi_minfo; 475 ASSERT(wq->qi_minfo == modinfo); 476 477 kmem_free(stream, sizeof (struct streamtab)); 478 kmem_free(wq, sizeof (struct qinit)); 479 kmem_free(rq, sizeof (struct qinit)); 480 kmem_free(modinfo->mi_idname, FMNAMESZ); 481 kmem_free(modinfo, sizeof (struct module_info)); 482 } 483 484 /* 485 * Initialize this module's data structures. 486 */ 487 void 488 dld_str_init(void) 489 { 490 /* 491 * Create dld_str_t object cache. 492 */ 493 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 494 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 495 ASSERT(str_cachep != NULL); 496 497 /* 498 * Create a hash table for maintaining dld_str_t's. 499 * The ds_minor field (the clone minor number) of a dld_str_t 500 * is used as a key for this hash table because this number is 501 * globally unique (allocated from "dls_minor_arena"). 502 */ 503 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 504 mod_hash_null_valdtor); 505 506 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL); 507 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL); 508 509 dld_taskq_quit = B_FALSE; 510 dld_taskq_done = B_FALSE; 511 list_create(&dld_taskq_list, sizeof (dld_str_t), 512 offsetof(dld_str_t, ds_tqlist)); 513 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0, 514 &p0, TS_RUN, minclsyspri); 515 } 516 517 /* 518 * Tear down this module's data structures. 519 */ 520 int 521 dld_str_fini(void) 522 { 523 /* 524 * Make sure that there are no objects in use. 525 */ 526 if (str_count != 0) 527 return (EBUSY); 528 529 /* 530 * Ask the dld_taskq thread to quit and wait for it to be done 531 */ 532 mutex_enter(&dld_taskq_lock); 533 dld_taskq_quit = B_TRUE; 534 cv_signal(&dld_taskq_cv); 535 while (!dld_taskq_done) 536 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 537 mutex_exit(&dld_taskq_lock); 538 list_destroy(&dld_taskq_list); 539 /* 540 * Destroy object cache. 541 */ 542 kmem_cache_destroy(str_cachep); 543 mod_hash_destroy_idhash(str_hashp); 544 return (0); 545 } 546 547 /* 548 * Create a new dld_str_t object. 549 */ 550 dld_str_t * 551 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 552 { 553 dld_str_t *dsp; 554 int err; 555 556 /* 557 * Allocate an object from the cache. 558 */ 559 atomic_add_32(&str_count, 1); 560 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 561 562 /* 563 * Allocate the dummy mblk for flow-control. 564 */ 565 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 566 if (dsp->ds_tx_flow_mp == NULL) { 567 kmem_cache_free(str_cachep, dsp); 568 atomic_add_32(&str_count, -1); 569 return (NULL); 570 } 571 dsp->ds_type = type; 572 dsp->ds_major = major; 573 dsp->ds_style = style; 574 575 /* 576 * Initialize the queue pointers. 577 */ 578 ASSERT(RD(rq) == rq); 579 dsp->ds_rq = rq; 580 dsp->ds_wq = WR(rq); 581 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 582 583 /* 584 * We want explicit control over our write-side STREAMS queue 585 * where the dummy mblk gets added/removed for flow-control. 586 */ 587 noenable(WR(rq)); 588 589 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 590 (mod_hash_val_t)dsp); 591 ASSERT(err == 0); 592 return (dsp); 593 } 594 595 /* 596 * Destroy a dld_str_t object. 597 */ 598 void 599 dld_str_destroy(dld_str_t *dsp) 600 { 601 queue_t *rq; 602 queue_t *wq; 603 mod_hash_val_t val; 604 605 /* 606 * Clear the queue pointers. 607 */ 608 rq = dsp->ds_rq; 609 wq = dsp->ds_wq; 610 ASSERT(wq == WR(rq)); 611 rq->q_ptr = wq->q_ptr = NULL; 612 dsp->ds_rq = dsp->ds_wq = NULL; 613 614 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 615 ASSERT(dsp->ds_sap == 0); 616 ASSERT(dsp->ds_mh == NULL); 617 ASSERT(dsp->ds_mch == NULL); 618 ASSERT(dsp->ds_promisc == 0); 619 ASSERT(dsp->ds_mph == NULL); 620 ASSERT(dsp->ds_mip == NULL); 621 ASSERT(dsp->ds_mnh == NULL); 622 623 ASSERT(dsp->ds_polling == B_FALSE); 624 ASSERT(dsp->ds_direct == B_FALSE); 625 ASSERT(dsp->ds_lso == B_FALSE); 626 ASSERT(dsp->ds_lso_max == 0); 627 ASSERT(dsp->ds_passivestate != DLD_ACTIVE); 628 629 /* 630 * Reinitialize all the flags. 631 */ 632 dsp->ds_notifications = 0; 633 dsp->ds_passivestate = DLD_UNINITIALIZED; 634 dsp->ds_mode = DLD_UNITDATA; 635 dsp->ds_native = B_FALSE; 636 637 ASSERT(dsp->ds_datathr_cnt == 0); 638 ASSERT(dsp->ds_pending_head == NULL); 639 ASSERT(dsp->ds_pending_tail == NULL); 640 ASSERT(!dsp->ds_dlpi_pending); 641 642 ASSERT(dsp->ds_dlp == NULL); 643 ASSERT(dsp->ds_dmap == NULL); 644 ASSERT(dsp->ds_rx == NULL); 645 ASSERT(dsp->ds_rx_arg == NULL); 646 ASSERT(dsp->ds_next == NULL); 647 ASSERT(dsp->ds_head == NULL); 648 649 /* 650 * Free the dummy mblk if exists. 651 */ 652 if (dsp->ds_tx_flow_mp != NULL) { 653 freeb(dsp->ds_tx_flow_mp); 654 dsp->ds_tx_flow_mp = NULL; 655 } 656 657 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 658 ASSERT(dsp == (dld_str_t *)val); 659 660 /* 661 * Free the object back to the cache. 662 */ 663 kmem_cache_free(str_cachep, dsp); 664 atomic_add_32(&str_count, -1); 665 } 666 667 /* 668 * kmem_cache contructor function: see kmem_cache_create(9f). 669 */ 670 /*ARGSUSED*/ 671 static int 672 str_constructor(void *buf, void *cdrarg, int kmflags) 673 { 674 dld_str_t *dsp = buf; 675 676 bzero(buf, sizeof (dld_str_t)); 677 678 /* 679 * Allocate a new minor number. 680 */ 681 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) 682 return (-1); 683 684 /* 685 * Initialize the DLPI state machine. 686 */ 687 dsp->ds_dlstate = DL_UNATTACHED; 688 689 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL); 690 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL); 691 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL); 692 693 return (0); 694 } 695 696 /* 697 * kmem_cache destructor function. 698 */ 699 /*ARGSUSED*/ 700 static void 701 str_destructor(void *buf, void *cdrarg) 702 { 703 dld_str_t *dsp = buf; 704 705 /* 706 * Release the minor number. 707 */ 708 mac_minor_rele(dsp->ds_minor); 709 710 ASSERT(dsp->ds_tx_flow_mp == NULL); 711 712 mutex_destroy(&dsp->ds_lock); 713 cv_destroy(&dsp->ds_datathr_cv); 714 cv_destroy(&dsp->ds_dlpi_pending_cv); 715 } 716 717 /* 718 * Update the priority bits and VID (may need to insert tag if mp points 719 * to an untagged packet. 720 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 721 */ 722 static mblk_t * 723 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid, 724 link_tagmode_t tagmode) 725 { 726 mblk_t *hmp; 727 struct ether_vlan_header *evhp; 728 struct ether_header *ehp; 729 uint16_t old_tci = 0; 730 size_t len; 731 732 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 733 734 evhp = (struct ether_vlan_header *)mp->b_rptr; 735 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 736 /* 737 * Tagged packet, update the priority bits. 738 */ 739 len = sizeof (struct ether_vlan_header); 740 741 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 742 /* 743 * In case some drivers only check the db_ref 744 * count of the first mblk, we pullup the 745 * message into a single mblk. 746 */ 747 hmp = msgpullup(mp, -1); 748 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 749 freemsg(hmp); 750 return (NULL); 751 } else { 752 freemsg(mp); 753 mp = hmp; 754 } 755 } 756 757 evhp = (struct ether_vlan_header *)mp->b_rptr; 758 old_tci = ntohs(evhp->ether_tci); 759 } else { 760 /* 761 * Untagged packet. Two factors will cause us to insert a 762 * VLAN header: 763 * - This is a VLAN link (vid is specified) 764 * - The link supports user priority tagging and the priority 765 * is non-zero. 766 */ 767 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY) 768 return (mp); 769 770 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 771 if (hmp == NULL) 772 return (NULL); 773 774 evhp = (struct ether_vlan_header *)hmp->b_rptr; 775 ehp = (struct ether_header *)mp->b_rptr; 776 777 /* 778 * Copy the MAC addresses and typelen 779 */ 780 bcopy(ehp, evhp, (ETHERADDRL * 2)); 781 evhp->ether_type = ehp->ether_type; 782 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 783 784 hmp->b_wptr += sizeof (struct ether_vlan_header); 785 mp->b_rptr += sizeof (struct ether_header); 786 787 /* 788 * Free the original message if it's now empty. Link the 789 * rest of the messages to the header message. 790 */ 791 if (MBLKL(mp) == 0) { 792 hmp->b_cont = mp->b_cont; 793 freeb(mp); 794 } else { 795 hmp->b_cont = mp; 796 } 797 mp = hmp; 798 } 799 800 if (pri == 0) 801 pri = VLAN_PRI(old_tci); 802 if (vid == VLAN_ID_NONE) 803 vid = VLAN_ID(old_tci); 804 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 805 return (mp); 806 } 807 808 /* 809 * M_DATA put (IP fast-path mode) 810 */ 811 mac_tx_cookie_t 812 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint, 813 uint16_t flag) 814 { 815 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 816 mblk_t *newmp; 817 uint_t pri; 818 mac_tx_cookie_t cookie; 819 820 if (is_ethernet) { 821 /* 822 * Update the priority bits to the assigned priority. 823 */ 824 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 825 826 if (pri != 0) { 827 newmp = i_dld_ether_header_update_tag(mp, pri, 828 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode); 829 if (newmp == NULL) 830 goto discard; 831 mp = newmp; 832 } 833 } 834 835 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) { 836 DLD_SETQFULL(dsp); 837 } 838 return (cookie); 839 840 discard: 841 /* TODO: bump kstat? */ 842 freemsg(mp); 843 return (NULL); 844 } 845 846 /* 847 * M_DATA put (DLIOCRAW mode) 848 */ 849 static void 850 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 851 { 852 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 853 mblk_t *bp, *newmp; 854 size_t size; 855 mac_header_info_t mhi; 856 uint_t pri, vid, dvid; 857 uint_t max_sdu; 858 859 /* 860 * Certain MAC type plugins provide an illusion for raw DLPI 861 * consumers. They pretend that the MAC layer is something that 862 * it's not for the benefit of observability tools. For example, 863 * mac_wifi pretends that it's Ethernet for such consumers. 864 * Here, unless native mode is enabled, we call into the MAC layer so 865 * that this illusion can be maintained. The plugin will optionally 866 * transform the MAC header here into something that can be passed 867 * down. The header goes from raw mode to "cooked" mode. 868 */ 869 if (!dsp->ds_native) { 870 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 871 goto discard; 872 mp = newmp; 873 } 874 875 size = MBLKL(mp); 876 877 /* 878 * Check the packet is not too big and that any remaining 879 * fragment list is composed entirely of M_DATA messages. (We 880 * know the first fragment was M_DATA otherwise we could not 881 * have got here). 882 */ 883 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 884 if (DB_TYPE(bp) != M_DATA) 885 goto discard; 886 size += MBLKL(bp); 887 } 888 889 if (dls_link_header_info(dsp->ds_dlp, mp, &mhi) != 0) 890 goto discard; 891 892 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 893 /* 894 * If LSO is enabled, check the size against lso_max. Otherwise, 895 * compare the packet size with max_sdu. 896 */ 897 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu; 898 if (size > max_sdu + mhi.mhi_hdrsize) 899 goto discard; 900 901 if (is_ethernet) { 902 dvid = mac_client_vid(dsp->ds_mch); 903 904 /* 905 * Discard the packet if this is a VLAN stream but the VID in 906 * the packet is not correct. 907 */ 908 vid = VLAN_ID(mhi.mhi_tci); 909 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 910 goto discard; 911 912 /* 913 * Discard the packet if this packet is a tagged packet 914 * but both pri and VID are 0. 915 */ 916 pri = VLAN_PRI(mhi.mhi_tci); 917 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 && 918 vid == VLAN_ID_NONE) 919 goto discard; 920 921 /* 922 * Update the priority bits to the per-stream priority if 923 * priority is not set in the packet. Update the VID for 924 * packets on a VLAN stream. 925 */ 926 pri = (pri == 0) ? dsp->ds_pri : 0; 927 if ((pri != 0) || (dvid != VLAN_ID_NONE)) { 928 if ((newmp = i_dld_ether_header_update_tag(mp, pri, 929 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) { 930 goto discard; 931 } 932 mp = newmp; 933 } 934 } 935 936 if (DLD_TX(dsp, mp, 0, 0) != NULL) { 937 /* Turn on flow-control for dld */ 938 DLD_SETQFULL(dsp); 939 } 940 return; 941 942 discard: 943 /* TODO: bump kstat? */ 944 freemsg(mp); 945 } 946 947 /* 948 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 949 */ 950 int 951 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 952 { 953 dev_t dev; 954 int err; 955 const char *drvname; 956 mac_perim_handle_t mph = NULL; 957 boolean_t qassociated = B_FALSE; 958 dls_link_t *dlp = NULL; 959 dls_dl_handle_t ddp = NULL; 960 961 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 962 return (EINVAL); 963 964 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) 965 return (ENOTSUP); 966 967 /* 968 * /dev node access. This will still be supported for backward 969 * compatibility reason. 970 */ 971 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && 972 (strcmp(drvname, "vnic") != 0)) { 973 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 974 return (EINVAL); 975 qassociated = B_TRUE; 976 } 977 978 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); 979 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0) 980 goto failed; 981 982 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0) 983 goto failed; 984 985 /* 986 * Open a channel. 987 */ 988 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0) 989 goto failed; 990 991 if ((err = dls_open(dlp, ddp, dsp)) != 0) 992 goto failed; 993 994 /* 995 * Set the default packet priority. 996 */ 997 dsp->ds_pri = 0; 998 999 /* 1000 * Add a notify function so that the we get updates from the MAC. 1001 */ 1002 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp); 1003 dsp->ds_dlstate = DL_UNBOUND; 1004 mac_perim_exit(mph); 1005 return (0); 1006 1007 failed: 1008 if (dlp != NULL) 1009 dls_link_rele(dlp); 1010 if (mph != NULL) 1011 mac_perim_exit(mph); 1012 if (ddp != NULL) 1013 dls_devnet_rele(ddp); 1014 if (qassociated) 1015 (void) qassociate(dsp->ds_wq, -1); 1016 1017 return (err); 1018 } 1019 1020 /* 1021 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1022 * from close(2) for style 2. 1023 */ 1024 void 1025 dld_str_detach(dld_str_t *dsp) 1026 { 1027 mac_perim_handle_t mph; 1028 int err; 1029 1030 ASSERT(dsp->ds_datathr_cnt == 0); 1031 1032 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 1033 /* 1034 * Remove the notify function. 1035 * 1036 * Note that we cannot wait for the notification callback to be removed 1037 * since it could cause the deadlock with str_notify() since they both 1038 * need the mac perimeter. Continue if we cannot remove the 1039 * notification callback right now and wait after we leave the 1040 * perimeter. 1041 */ 1042 err = mac_notify_remove(dsp->ds_mnh, B_FALSE); 1043 dsp->ds_mnh = NULL; 1044 1045 /* 1046 * Disable the capabilities 1047 */ 1048 dld_capabilities_disable(dsp); 1049 1050 /* 1051 * Clear LSO flags. 1052 */ 1053 dsp->ds_lso = B_FALSE; 1054 dsp->ds_lso_max = 0; 1055 1056 dls_close(dsp); 1057 mac_perim_exit(mph); 1058 1059 /* 1060 * Now we leave the mac perimeter. If mac_notify_remove() failed 1061 * because the notification callback was in progress, wait for 1062 * it to finish before we proceed. 1063 */ 1064 if (err != 0) 1065 mac_notify_remove_wait(dsp->ds_mh); 1066 1067 /* 1068 * An unreferenced tagged (non-persistent) vlan gets destroyed 1069 * automatically in the call to dls_devnet_rele. 1070 */ 1071 dls_devnet_rele(dsp->ds_ddh); 1072 1073 dsp->ds_sap = 0; 1074 dsp->ds_mh = NULL; 1075 dsp->ds_mch = NULL; 1076 dsp->ds_mip = NULL; 1077 1078 if (dsp->ds_style == DL_STYLE2) 1079 (void) qassociate(dsp->ds_wq, -1); 1080 1081 /* 1082 * Re-initialize the DLPI state machine. 1083 */ 1084 dsp->ds_dlstate = DL_UNATTACHED; 1085 } 1086 1087 /* 1088 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1089 * tags before sending packets up to the DLS clients, with the exception of 1090 * special priority tagged packets, in that case, we set the VID to 0. 1091 * mp must be a VLAN tagged packet. 1092 */ 1093 static mblk_t * 1094 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri) 1095 { 1096 mblk_t *newmp; 1097 struct ether_vlan_header *evhp; 1098 uint16_t tci, new_tci; 1099 1100 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1101 if (DB_REF(mp) > 1) { 1102 newmp = copymsg(mp); 1103 if (newmp == NULL) 1104 return (NULL); 1105 freemsg(mp); 1106 mp = newmp; 1107 } 1108 evhp = (struct ether_vlan_header *)mp->b_rptr; 1109 1110 tci = ntohs(evhp->ether_tci); 1111 if (VLAN_PRI(tci) == 0 || !keep_pri) { 1112 /* 1113 * Priority is 0, strip the tag. 1114 */ 1115 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1116 mp->b_rptr += VLAN_TAGSZ; 1117 } else { 1118 /* 1119 * Priority is not 0, update the VID to 0. 1120 */ 1121 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1122 evhp->ether_tci = htons(new_tci); 1123 } 1124 return (mp); 1125 } 1126 1127 /* 1128 * Raw mode receive function. 1129 */ 1130 /*ARGSUSED*/ 1131 void 1132 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1133 mac_header_info_t *mhip) 1134 { 1135 dld_str_t *dsp = (dld_str_t *)arg; 1136 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1137 mblk_t *next, *newmp; 1138 1139 ASSERT(mp != NULL); 1140 do { 1141 /* 1142 * Get the pointer to the next packet in the chain and then 1143 * clear b_next before the packet gets passed on. 1144 */ 1145 next = mp->b_next; 1146 mp->b_next = NULL; 1147 1148 /* 1149 * Wind back b_rptr to point at the MAC header. 1150 */ 1151 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1152 mp->b_rptr -= mhip->mhi_hdrsize; 1153 1154 /* 1155 * Certain MAC type plugins provide an illusion for raw 1156 * DLPI consumers. They pretend that the MAC layer is 1157 * something that it's not for the benefit of observability 1158 * tools. For example, mac_wifi pretends that it's Ethernet 1159 * for such consumers. Here, unless native mode is enabled, 1160 * we call into the MAC layer so that this illusion can be 1161 * maintained. The plugin will optionally transform the MAC 1162 * header here into something that can be passed up to raw 1163 * consumers. The header goes from "cooked" mode to raw mode. 1164 */ 1165 if (!dsp->ds_native) { 1166 newmp = mac_header_uncook(dsp->ds_mh, mp); 1167 if (newmp == NULL) { 1168 freemsg(mp); 1169 goto next; 1170 } 1171 mp = newmp; 1172 } 1173 1174 /* 1175 * Strip the VLAN tag for VLAN streams. 1176 */ 1177 if (is_ethernet && 1178 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) { 1179 /* 1180 * The priority should be kept only for VLAN 1181 * data-links. 1182 */ 1183 newmp = i_dld_ether_header_strip_tag(mp, 1184 mac_client_is_vlan_vnic(dsp->ds_mch)); 1185 if (newmp == NULL) { 1186 freemsg(mp); 1187 goto next; 1188 } 1189 mp = newmp; 1190 } 1191 1192 /* 1193 * Pass the packet on. 1194 */ 1195 if (canputnext(dsp->ds_rq)) 1196 putnext(dsp->ds_rq, mp); 1197 else 1198 freemsg(mp); 1199 1200 next: 1201 /* 1202 * Move on to the next packet in the chain. 1203 */ 1204 mp = next; 1205 } while (mp != NULL); 1206 } 1207 1208 /* 1209 * Fast-path receive function. 1210 */ 1211 /*ARGSUSED*/ 1212 void 1213 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1214 mac_header_info_t *mhip) 1215 { 1216 dld_str_t *dsp = (dld_str_t *)arg; 1217 mblk_t *next; 1218 size_t offset = 0; 1219 1220 /* 1221 * MAC header stripping rules: 1222 * - Tagged packets: 1223 * a. VLAN streams. Strip the whole VLAN header including the tag. 1224 * b. Physical streams 1225 * - VLAN packets (non-zero VID). The stream must be either a 1226 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1227 * Strip the Ethernet header but keep the VLAN header. 1228 * - Special tagged packets (zero VID) 1229 * * The stream is either a DL_PROMISC_SAP listener or a 1230 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1231 * keep the VLAN header. 1232 * * Otherwise, strip the whole VLAN header. 1233 * - Untagged packets. Strip the whole MAC header. 1234 */ 1235 if (mhip->mhi_istagged && 1236 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1237 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1238 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1239 offset = VLAN_TAGSZ; 1240 } 1241 1242 ASSERT(mp != NULL); 1243 do { 1244 /* 1245 * Get the pointer to the next packet in the chain and then 1246 * clear b_next before the packet gets passed on. 1247 */ 1248 next = mp->b_next; 1249 mp->b_next = NULL; 1250 1251 /* 1252 * Wind back b_rptr to point at the VLAN header. 1253 */ 1254 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1255 mp->b_rptr -= offset; 1256 1257 /* 1258 * Pass the packet on. 1259 */ 1260 if (canputnext(dsp->ds_rq)) 1261 putnext(dsp->ds_rq, mp); 1262 else 1263 freemsg(mp); 1264 /* 1265 * Move on to the next packet in the chain. 1266 */ 1267 mp = next; 1268 } while (mp != NULL); 1269 } 1270 1271 /* 1272 * Default receive function (send DL_UNITDATA_IND messages). 1273 */ 1274 /*ARGSUSED*/ 1275 void 1276 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1277 mac_header_info_t *mhip) 1278 { 1279 dld_str_t *dsp = (dld_str_t *)arg; 1280 mblk_t *ud_mp; 1281 mblk_t *next; 1282 size_t offset = 0; 1283 boolean_t strip_vlan = B_TRUE; 1284 1285 /* 1286 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1287 */ 1288 if (mhip->mhi_istagged && 1289 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1290 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1291 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1292 offset = VLAN_TAGSZ; 1293 strip_vlan = B_FALSE; 1294 } 1295 1296 ASSERT(mp != NULL); 1297 do { 1298 /* 1299 * Get the pointer to the next packet in the chain and then 1300 * clear b_next before the packet gets passed on. 1301 */ 1302 next = mp->b_next; 1303 mp->b_next = NULL; 1304 1305 /* 1306 * Wind back b_rptr to point at the MAC header. 1307 */ 1308 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1309 mp->b_rptr -= mhip->mhi_hdrsize; 1310 1311 /* 1312 * Create the DL_UNITDATA_IND M_PROTO. 1313 */ 1314 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1315 freemsgchain(mp); 1316 return; 1317 } 1318 1319 /* 1320 * Advance b_rptr to point at the payload (or the VLAN header). 1321 */ 1322 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1323 1324 /* 1325 * Prepend the DL_UNITDATA_IND. 1326 */ 1327 ud_mp->b_cont = mp; 1328 1329 /* 1330 * Send the message. 1331 */ 1332 if (canputnext(dsp->ds_rq)) 1333 putnext(dsp->ds_rq, ud_mp); 1334 else 1335 freemsg(ud_mp); 1336 1337 /* 1338 * Move on to the next packet in the chain. 1339 */ 1340 mp = next; 1341 } while (mp != NULL); 1342 } 1343 1344 /* 1345 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE 1346 */ 1347 static void 1348 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu) 1349 { 1350 mblk_t *mp; 1351 dl_notify_ind_t *dlip; 1352 1353 if (!(dsp->ds_notifications & DL_NOTE_SDU_SIZE)) 1354 return; 1355 1356 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1357 M_PROTO, 0)) == NULL) 1358 return; 1359 1360 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1361 dlip = (dl_notify_ind_t *)mp->b_rptr; 1362 dlip->dl_primitive = DL_NOTIFY_IND; 1363 dlip->dl_notification = DL_NOTE_SDU_SIZE; 1364 dlip->dl_data = max_sdu; 1365 1366 qreply(dsp->ds_wq, mp); 1367 } 1368 1369 /* 1370 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1371 * current state of the interface. 1372 */ 1373 void 1374 dld_str_notify_ind(dld_str_t *dsp) 1375 { 1376 mac_notify_type_t type; 1377 1378 for (type = 0; type < MAC_NNOTE; type++) 1379 str_notify(dsp, type); 1380 } 1381 1382 typedef struct dl_unitdata_ind_wrapper { 1383 dl_unitdata_ind_t dl_unitdata; 1384 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1385 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1386 } dl_unitdata_ind_wrapper_t; 1387 1388 /* 1389 * Create a DL_UNITDATA_IND M_PROTO message. 1390 */ 1391 static mblk_t * 1392 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1393 { 1394 mblk_t *nmp; 1395 dl_unitdata_ind_wrapper_t *dlwp; 1396 dl_unitdata_ind_t *dlp; 1397 mac_header_info_t mhi; 1398 uint_t addr_length; 1399 uint8_t *daddr; 1400 uint8_t *saddr; 1401 1402 /* 1403 * Get the packet header information. 1404 */ 1405 if (dls_link_header_info(dsp->ds_dlp, mp, &mhi) != 0) 1406 return (NULL); 1407 1408 /* 1409 * Allocate a message large enough to contain the wrapper structure 1410 * defined above. 1411 */ 1412 if ((nmp = mexchange(dsp->ds_wq, NULL, 1413 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1414 DL_UNITDATA_IND)) == NULL) 1415 return (NULL); 1416 1417 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1418 1419 dlp = &(dlwp->dl_unitdata); 1420 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1421 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1422 1423 /* 1424 * Copy in the destination address. 1425 */ 1426 addr_length = dsp->ds_mip->mi_addr_length; 1427 daddr = dlwp->dl_dest_addr; 1428 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1429 bcopy(mhi.mhi_daddr, daddr, addr_length); 1430 1431 /* 1432 * Set the destination DLSAP to the SAP value encoded in the packet. 1433 */ 1434 if (mhi.mhi_istagged && !strip_vlan) 1435 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1436 else 1437 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1438 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1439 1440 /* 1441 * If the destination address was multicast or broadcast then the 1442 * dl_group_address field should be non-zero. 1443 */ 1444 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1445 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1446 1447 /* 1448 * Copy in the source address if one exists. Some MAC types (DL_IB 1449 * for example) may not have access to source information. 1450 */ 1451 if (mhi.mhi_saddr == NULL) { 1452 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1453 } else { 1454 saddr = dlwp->dl_src_addr; 1455 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1456 bcopy(mhi.mhi_saddr, saddr, addr_length); 1457 1458 /* 1459 * Set the source DLSAP to the packet ethertype. 1460 */ 1461 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1462 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1463 } 1464 1465 return (nmp); 1466 } 1467 1468 /* 1469 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1470 */ 1471 static void 1472 str_notify_promisc_on_phys(dld_str_t *dsp) 1473 { 1474 mblk_t *mp; 1475 dl_notify_ind_t *dlip; 1476 1477 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1478 return; 1479 1480 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1481 M_PROTO, 0)) == NULL) 1482 return; 1483 1484 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1485 dlip = (dl_notify_ind_t *)mp->b_rptr; 1486 dlip->dl_primitive = DL_NOTIFY_IND; 1487 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1488 1489 qreply(dsp->ds_wq, mp); 1490 } 1491 1492 /* 1493 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1494 */ 1495 static void 1496 str_notify_promisc_off_phys(dld_str_t *dsp) 1497 { 1498 mblk_t *mp; 1499 dl_notify_ind_t *dlip; 1500 1501 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1502 return; 1503 1504 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1505 M_PROTO, 0)) == NULL) 1506 return; 1507 1508 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1509 dlip = (dl_notify_ind_t *)mp->b_rptr; 1510 dlip->dl_primitive = DL_NOTIFY_IND; 1511 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1512 1513 qreply(dsp->ds_wq, mp); 1514 } 1515 1516 /* 1517 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1518 */ 1519 static void 1520 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1521 { 1522 mblk_t *mp; 1523 dl_notify_ind_t *dlip; 1524 uint_t addr_length; 1525 uint16_t ethertype; 1526 1527 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1528 return; 1529 1530 addr_length = dsp->ds_mip->mi_addr_length; 1531 if ((mp = mexchange(dsp->ds_wq, NULL, 1532 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1533 M_PROTO, 0)) == NULL) 1534 return; 1535 1536 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1537 dlip = (dl_notify_ind_t *)mp->b_rptr; 1538 dlip->dl_primitive = DL_NOTIFY_IND; 1539 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1540 dlip->dl_data = DL_CURR_PHYS_ADDR; 1541 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1542 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1543 1544 bcopy(addr, &dlip[1], addr_length); 1545 1546 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1547 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1548 1549 qreply(dsp->ds_wq, mp); 1550 } 1551 1552 /* 1553 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1554 */ 1555 static void 1556 str_notify_link_up(dld_str_t *dsp) 1557 { 1558 mblk_t *mp; 1559 dl_notify_ind_t *dlip; 1560 1561 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1562 return; 1563 1564 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1565 M_PROTO, 0)) == NULL) 1566 return; 1567 1568 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1569 dlip = (dl_notify_ind_t *)mp->b_rptr; 1570 dlip->dl_primitive = DL_NOTIFY_IND; 1571 dlip->dl_notification = DL_NOTE_LINK_UP; 1572 1573 qreply(dsp->ds_wq, mp); 1574 } 1575 1576 /* 1577 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1578 */ 1579 static void 1580 str_notify_link_down(dld_str_t *dsp) 1581 { 1582 mblk_t *mp; 1583 dl_notify_ind_t *dlip; 1584 1585 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1586 return; 1587 1588 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1589 M_PROTO, 0)) == NULL) 1590 return; 1591 1592 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1593 dlip = (dl_notify_ind_t *)mp->b_rptr; 1594 dlip->dl_primitive = DL_NOTIFY_IND; 1595 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1596 1597 qreply(dsp->ds_wq, mp); 1598 } 1599 1600 /* 1601 * DL_NOTIFY_IND: DL_NOTE_SPEED 1602 */ 1603 static void 1604 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1605 { 1606 mblk_t *mp; 1607 dl_notify_ind_t *dlip; 1608 1609 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1610 return; 1611 1612 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1613 M_PROTO, 0)) == NULL) 1614 return; 1615 1616 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1617 dlip = (dl_notify_ind_t *)mp->b_rptr; 1618 dlip->dl_primitive = DL_NOTIFY_IND; 1619 dlip->dl_notification = DL_NOTE_SPEED; 1620 dlip->dl_data = speed; 1621 1622 qreply(dsp->ds_wq, mp); 1623 } 1624 1625 /* 1626 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1627 */ 1628 static void 1629 str_notify_capab_reneg(dld_str_t *dsp) 1630 { 1631 mblk_t *mp; 1632 dl_notify_ind_t *dlip; 1633 1634 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1635 return; 1636 1637 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1638 M_PROTO, 0)) == NULL) 1639 return; 1640 1641 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1642 dlip = (dl_notify_ind_t *)mp->b_rptr; 1643 dlip->dl_primitive = DL_NOTIFY_IND; 1644 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1645 1646 qreply(dsp->ds_wq, mp); 1647 } 1648 1649 /* 1650 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1651 */ 1652 static void 1653 str_notify_fastpath_flush(dld_str_t *dsp) 1654 { 1655 mblk_t *mp; 1656 dl_notify_ind_t *dlip; 1657 1658 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1659 return; 1660 1661 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1662 M_PROTO, 0)) == NULL) 1663 return; 1664 1665 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1666 dlip = (dl_notify_ind_t *)mp->b_rptr; 1667 dlip->dl_primitive = DL_NOTIFY_IND; 1668 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1669 1670 qreply(dsp->ds_wq, mp); 1671 } 1672 1673 /* 1674 * MAC notification callback. 1675 */ 1676 void 1677 str_notify(void *arg, mac_notify_type_t type) 1678 { 1679 dld_str_t *dsp = (dld_str_t *)arg; 1680 queue_t *q = dsp->ds_wq; 1681 mac_handle_t mh = dsp->ds_mh; 1682 mac_client_handle_t mch = dsp->ds_mch; 1683 uint8_t addr[MAXMACADDRLEN]; 1684 1685 switch (type) { 1686 case MAC_NOTE_TX: 1687 qenable(q); 1688 break; 1689 1690 case MAC_NOTE_DEVPROMISC: 1691 /* 1692 * Send the appropriate DL_NOTIFY_IND. 1693 */ 1694 if (mac_promisc_get(mh)) 1695 str_notify_promisc_on_phys(dsp); 1696 else 1697 str_notify_promisc_off_phys(dsp); 1698 break; 1699 1700 case MAC_NOTE_UNICST: 1701 /* 1702 * This notification is sent whenever the MAC unicast 1703 * address changes. 1704 */ 1705 mac_unicast_primary_get(mh, addr); 1706 1707 /* 1708 * Send the appropriate DL_NOTIFY_IND. 1709 */ 1710 str_notify_phys_addr(dsp, addr); 1711 break; 1712 1713 case MAC_NOTE_LOWLINK: 1714 case MAC_NOTE_LINK: 1715 /* 1716 * LOWLINK refers to the actual link status. For links that 1717 * are not part of a bridge instance LOWLINK and LINK state 1718 * are the same. But for a link part of a bridge instance 1719 * LINK state refers to the aggregate link status: "up" when 1720 * at least one link part of the bridge is up and is "down" 1721 * when all links part of the bridge are down. 1722 * 1723 * Clients can request to be notified of the LOWLINK state 1724 * using the DLIOCLOWLINK ioctl. Clients such as the bridge 1725 * daemon request lowlink state changes and upper layer clients 1726 * receive notifications of the aggregate link state changes 1727 * which is the default when requesting LINK UP/DOWN state 1728 * notifications. 1729 */ 1730 1731 /* 1732 * Check that the notification type matches the one that we 1733 * want. If we want lower-level link notifications, and this 1734 * is upper, or if we want upper and this is lower, then 1735 * ignore. 1736 */ 1737 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink) 1738 break; 1739 /* 1740 * This notification is sent every time the MAC driver 1741 * updates the link state. 1742 */ 1743 switch (mac_client_stat_get(mch, dsp->ds_lowlink ? 1744 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) { 1745 case LINK_STATE_UP: { 1746 uint64_t speed; 1747 /* 1748 * The link is up so send the appropriate 1749 * DL_NOTIFY_IND. 1750 */ 1751 str_notify_link_up(dsp); 1752 1753 speed = mac_stat_get(mh, MAC_STAT_IFSPEED); 1754 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1755 break; 1756 } 1757 case LINK_STATE_DOWN: 1758 /* 1759 * The link is down so send the appropriate 1760 * DL_NOTIFY_IND. 1761 */ 1762 str_notify_link_down(dsp); 1763 break; 1764 1765 default: 1766 break; 1767 } 1768 break; 1769 1770 case MAC_NOTE_CAPAB_CHG: 1771 /* 1772 * This notification is sent whenever the MAC resources 1773 * change or capabilities change. We need to renegotiate 1774 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1775 */ 1776 str_notify_capab_reneg(dsp); 1777 break; 1778 1779 case MAC_NOTE_SDU_SIZE: { 1780 uint_t max_sdu; 1781 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 1782 str_notify_sdu_size(dsp, max_sdu); 1783 break; 1784 } 1785 1786 case MAC_NOTE_FASTPATH_FLUSH: 1787 str_notify_fastpath_flush(dsp); 1788 break; 1789 1790 /* Unused notifications */ 1791 case MAC_NOTE_MARGIN: 1792 break; 1793 1794 default: 1795 ASSERT(B_FALSE); 1796 break; 1797 } 1798 } 1799 1800 /* 1801 * This function is called via a taskq mechansim to process all control 1802 * messages on a per 'dsp' end point. 1803 */ 1804 static void 1805 dld_wput_nondata_task(void *arg) 1806 { 1807 dld_str_t *dsp = arg; 1808 mblk_t *mp; 1809 1810 mutex_enter(&dsp->ds_lock); 1811 while (dsp->ds_pending_head != NULL) { 1812 mp = dsp->ds_pending_head; 1813 dsp->ds_pending_head = mp->b_next; 1814 mp->b_next = NULL; 1815 if (dsp->ds_pending_head == NULL) 1816 dsp->ds_pending_tail = NULL; 1817 mutex_exit(&dsp->ds_lock); 1818 1819 switch (DB_TYPE(mp)) { 1820 case M_PROTO: 1821 case M_PCPROTO: 1822 dld_proto(dsp, mp); 1823 break; 1824 case M_IOCTL: 1825 dld_ioc(dsp, mp); 1826 break; 1827 default: 1828 ASSERT(0); 1829 } 1830 1831 mutex_enter(&dsp->ds_lock); 1832 } 1833 ASSERT(dsp->ds_pending_tail == NULL); 1834 dsp->ds_dlpi_pending = 0; 1835 cv_broadcast(&dsp->ds_dlpi_pending_cv); 1836 mutex_exit(&dsp->ds_lock); 1837 } 1838 1839 /* 1840 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This 1841 * thread is started at boot time. 1842 */ 1843 static void 1844 dld_taskq_dispatch(void) 1845 { 1846 callb_cpr_t cprinfo; 1847 dld_str_t *dsp; 1848 1849 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr, 1850 "dld_taskq_dispatch"); 1851 mutex_enter(&dld_taskq_lock); 1852 1853 while (!dld_taskq_quit) { 1854 dsp = list_head(&dld_taskq_list); 1855 while (dsp != NULL) { 1856 list_remove(&dld_taskq_list, dsp); 1857 mutex_exit(&dld_taskq_lock); 1858 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task, 1859 dsp, TQ_SLEEP) != 0); 1860 mutex_enter(&dld_taskq_lock); 1861 dsp = list_head(&dld_taskq_list); 1862 } 1863 1864 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1865 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 1866 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock); 1867 } 1868 1869 dld_taskq_done = B_TRUE; 1870 cv_signal(&dld_taskq_cv); 1871 CALLB_CPR_EXIT(&cprinfo); 1872 thread_exit(); 1873 } 1874 1875 /* 1876 * All control operations are serialized on the 'dsp' and are also funneled 1877 * through a taskq mechanism to ensure that subsequent processing has kernel 1878 * context and can safely use cv_wait. 1879 * 1880 * Mechanisms to handle taskq dispatch failures 1881 * 1882 * The only way to be sure that taskq dispatch does not fail is to either 1883 * specify TQ_SLEEP or to use a static taskq and prepopulate it with 1884 * some number of entries and make sure that the number of outstanding requests 1885 * are less than that number. We can't use TQ_SLEEP since we don't know the 1886 * context. Nor can we bound the total number of 'dsp' end points. So we are 1887 * unable to use either of the above schemes, and are forced to deal with 1888 * taskq dispatch failures. Note that even dynamic taskq could fail in 1889 * dispatch if TQ_NOSLEEP is specified, since this flag is translated 1890 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq 1891 * framework. 1892 * 1893 * We maintain a queue of 'dsp's that encountered taskq dispatch failure. 1894 * We also have a single global thread to retry the taskq dispatch. This 1895 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but 1896 * uses TQ_SLEEP to ensure eventual success of the dispatch operation. 1897 */ 1898 static void 1899 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) 1900 { 1901 ASSERT(mp->b_next == NULL); 1902 mutex_enter(&dsp->ds_lock); 1903 if (dsp->ds_pending_head != NULL) { 1904 ASSERT(dsp->ds_dlpi_pending); 1905 dsp->ds_pending_tail->b_next = mp; 1906 dsp->ds_pending_tail = mp; 1907 mutex_exit(&dsp->ds_lock); 1908 return; 1909 } 1910 ASSERT(dsp->ds_pending_tail == NULL); 1911 dsp->ds_pending_head = dsp->ds_pending_tail = mp; 1912 /* 1913 * At this point if ds_dlpi_pending is set, it implies that the taskq 1914 * thread is still active and is processing the last message, though 1915 * the pending queue has been emptied. 1916 */ 1917 if (dsp->ds_dlpi_pending) { 1918 mutex_exit(&dsp->ds_lock); 1919 return; 1920 } 1921 1922 dsp->ds_dlpi_pending = 1; 1923 mutex_exit(&dsp->ds_lock); 1924 1925 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp, 1926 TQ_NOSLEEP) != 0) 1927 return; 1928 1929 mutex_enter(&dld_taskq_lock); 1930 list_insert_tail(&dld_taskq_list, dsp); 1931 cv_signal(&dld_taskq_cv); 1932 mutex_exit(&dld_taskq_lock); 1933 } 1934 1935 /* 1936 * Process an M_IOCTL message. 1937 */ 1938 static void 1939 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1940 { 1941 uint_t cmd; 1942 1943 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1944 ASSERT(dsp->ds_type == DLD_DLPI); 1945 1946 switch (cmd) { 1947 case DLIOCNATIVE: 1948 ioc_native(dsp, mp); 1949 break; 1950 case DLIOCMARGININFO: 1951 ioc_margin(dsp, mp); 1952 break; 1953 case DLIOCRAW: 1954 ioc_raw(dsp, mp); 1955 break; 1956 case DLIOCHDRINFO: 1957 ioc_fast(dsp, mp); 1958 break; 1959 case DLIOCLOWLINK: 1960 ioc_lowlink(dsp, mp); 1961 break; 1962 default: 1963 ioc(dsp, mp); 1964 } 1965 } 1966 1967 /* 1968 * DLIOCNATIVE 1969 */ 1970 static void 1971 ioc_native(dld_str_t *dsp, mblk_t *mp) 1972 { 1973 queue_t *q = dsp->ds_wq; 1974 const mac_info_t *mip = dsp->ds_mip; 1975 1976 /* 1977 * Native mode can be enabled if it's disabled and if the 1978 * native media type is different. 1979 */ 1980 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 1981 dsp->ds_native = B_TRUE; 1982 1983 if (dsp->ds_native) 1984 miocack(q, mp, 0, mip->mi_nativemedia); 1985 else 1986 miocnak(q, mp, 0, ENOTSUP); 1987 } 1988 1989 /* 1990 * DLIOCMARGININFO 1991 */ 1992 static void 1993 ioc_margin(dld_str_t *dsp, mblk_t *mp) 1994 { 1995 queue_t *q = dsp->ds_wq; 1996 uint32_t margin; 1997 int err; 1998 1999 if (dsp->ds_dlstate == DL_UNATTACHED) { 2000 err = EINVAL; 2001 goto failed; 2002 } 2003 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) 2004 goto failed; 2005 2006 mac_margin_get(dsp->ds_mh, &margin); 2007 *((uint32_t *)mp->b_cont->b_rptr) = margin; 2008 miocack(q, mp, sizeof (uint32_t), 0); 2009 return; 2010 2011 failed: 2012 miocnak(q, mp, 0, err); 2013 } 2014 2015 /* 2016 * DLIOCRAW 2017 */ 2018 static void 2019 ioc_raw(dld_str_t *dsp, mblk_t *mp) 2020 { 2021 queue_t *q = dsp->ds_wq; 2022 mac_perim_handle_t mph; 2023 2024 if (dsp->ds_mh == NULL) { 2025 dsp->ds_mode = DLD_RAW; 2026 miocack(q, mp, 0, 0); 2027 return; 2028 } 2029 2030 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2031 if (dsp->ds_polling || dsp->ds_direct) { 2032 mac_perim_exit(mph); 2033 miocnak(q, mp, 0, EPROTO); 2034 return; 2035 } 2036 2037 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 2038 /* 2039 * Set the receive callback. 2040 */ 2041 dls_rx_set(dsp, dld_str_rx_raw, dsp); 2042 } 2043 2044 /* 2045 * Note that raw mode is enabled. 2046 */ 2047 dsp->ds_mode = DLD_RAW; 2048 mac_perim_exit(mph); 2049 2050 miocack(q, mp, 0, 0); 2051 } 2052 2053 /* 2054 * DLIOCHDRINFO 2055 */ 2056 static void 2057 ioc_fast(dld_str_t *dsp, mblk_t *mp) 2058 { 2059 dl_unitdata_req_t *dlp; 2060 off_t off; 2061 size_t len; 2062 const uint8_t *addr; 2063 uint16_t sap; 2064 mblk_t *nmp; 2065 mblk_t *hmp; 2066 uint_t addr_length; 2067 queue_t *q = dsp->ds_wq; 2068 int err; 2069 mac_perim_handle_t mph; 2070 2071 if (dld_opt & DLD_OPT_NO_FASTPATH) { 2072 err = ENOTSUP; 2073 goto failed; 2074 } 2075 2076 /* 2077 * DLIOCHDRINFO should only come from IP. The one initiated from 2078 * user-land should not be allowed. 2079 */ 2080 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 2081 err = EINVAL; 2082 goto failed; 2083 } 2084 2085 nmp = mp->b_cont; 2086 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 2087 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 2088 dlp->dl_primitive != DL_UNITDATA_REQ)) { 2089 err = EINVAL; 2090 goto failed; 2091 } 2092 2093 off = dlp->dl_dest_addr_offset; 2094 len = dlp->dl_dest_addr_length; 2095 2096 if (!MBLKIN(nmp, off, len)) { 2097 err = EINVAL; 2098 goto failed; 2099 } 2100 2101 if (dsp->ds_dlstate != DL_IDLE) { 2102 err = ENOTSUP; 2103 goto failed; 2104 } 2105 2106 addr_length = dsp->ds_mip->mi_addr_length; 2107 if (len != addr_length + sizeof (uint16_t)) { 2108 err = EINVAL; 2109 goto failed; 2110 } 2111 2112 addr = nmp->b_rptr + off; 2113 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2114 2115 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) { 2116 err = ENOMEM; 2117 goto failed; 2118 } 2119 2120 /* 2121 * This ioctl might happen concurrently with a direct call to dld_capab 2122 * that tries to enable direct and/or poll capabilities. Since the 2123 * stack does not serialize them, we do so here to avoid mixing 2124 * the callbacks. 2125 */ 2126 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2127 if (dsp->ds_mode != DLD_FASTPATH) { 2128 /* 2129 * Set the receive callback (unless polling is enabled). 2130 */ 2131 if (!dsp->ds_polling && !dsp->ds_direct) 2132 dls_rx_set(dsp, dld_str_rx_fastpath, dsp); 2133 2134 /* 2135 * Note that fast-path mode is enabled. 2136 */ 2137 dsp->ds_mode = DLD_FASTPATH; 2138 } 2139 mac_perim_exit(mph); 2140 2141 freemsg(nmp->b_cont); 2142 nmp->b_cont = hmp; 2143 2144 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2145 return; 2146 failed: 2147 miocnak(q, mp, 0, err); 2148 } 2149 2150 /* 2151 * DLIOCLOWLINK: request actual link state changes. When the 2152 * link is part of a bridge instance the client receives actual 2153 * link state changes and not the aggregate link status. Used by 2154 * the bridging daemon (bridged) for proper RSTP operation. 2155 */ 2156 static void 2157 ioc_lowlink(dld_str_t *dsp, mblk_t *mp) 2158 { 2159 queue_t *q = dsp->ds_wq; 2160 int err; 2161 2162 if ((err = miocpullup(mp, sizeof (int))) != 0) { 2163 miocnak(q, mp, 0, err); 2164 } else { 2165 /* LINTED: alignment */ 2166 dsp->ds_lowlink = *(boolean_t *)mp->b_cont->b_rptr; 2167 miocack(q, mp, 0, 0); 2168 } 2169 } 2170 2171 /* 2172 * Catch-all handler. 2173 */ 2174 static void 2175 ioc(dld_str_t *dsp, mblk_t *mp) 2176 { 2177 queue_t *q = dsp->ds_wq; 2178 2179 if (dsp->ds_dlstate == DL_UNATTACHED) { 2180 miocnak(q, mp, 0, EINVAL); 2181 return; 2182 } 2183 mac_ioctl(dsp->ds_mh, q, mp); 2184 } 2185