1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Data-Link Driver 27 */ 28 29 #include <inet/common.h> 30 #include <sys/strsubr.h> 31 #include <sys/stropts.h> 32 #include <sys/strsun.h> 33 #include <sys/vlan.h> 34 #include <sys/dld_impl.h> 35 #include <sys/cpuvar.h> 36 #include <sys/callb.h> 37 #include <sys/list.h> 38 #include <sys/mac_client.h> 39 #include <sys/mac_client_priv.h> 40 #include <sys/mac_flow.h> 41 42 static int str_constructor(void *, void *, int); 43 static void str_destructor(void *, void *); 44 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 45 static void str_notify_promisc_on_phys(dld_str_t *); 46 static void str_notify_promisc_off_phys(dld_str_t *); 47 static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *); 48 static void str_notify_link_up(dld_str_t *); 49 static void str_notify_link_down(dld_str_t *); 50 static void str_notify_capab_reneg(dld_str_t *); 51 static void str_notify_speed(dld_str_t *, uint32_t); 52 53 static void ioc_native(dld_str_t *, mblk_t *); 54 static void ioc_margin(dld_str_t *, mblk_t *); 55 static void ioc_raw(dld_str_t *, mblk_t *); 56 static void ioc_fast(dld_str_t *, mblk_t *); 57 static void ioc_lowlink(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static void dld_wput_nondata(dld_str_t *, mblk_t *); 61 62 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 63 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t, 64 link_tagmode_t); 65 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t); 66 67 static uint32_t str_count; 68 static kmem_cache_t *str_cachep; 69 static mod_hash_t *str_hashp; 70 71 #define STR_HASHSZ 64 72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 73 74 #define dld_taskq system_taskq 75 76 static kmutex_t dld_taskq_lock; 77 static kcondvar_t dld_taskq_cv; 78 static list_t dld_taskq_list; /* List of dld_str_t */ 79 boolean_t dld_taskq_quit; 80 boolean_t dld_taskq_done; 81 82 static void dld_taskq_dispatch(void); 83 84 /* 85 * Some notes on entry points, flow-control, queueing. 86 * 87 * This driver exports the traditional STREAMS put entry point as well as 88 * the non-STREAMS fast-path transmit routine which is provided to IP via 89 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 90 * and data operations, while the fast-path routine deals only with M_DATA 91 * fast-path packets. Regardless of the entry point, all outbound packets 92 * will end up in DLD_TX(), where they will be delivered to the MAC layer. 93 * 94 * The transmit logic operates in the following way: All packets coming 95 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control 96 * happens when the MAC layer indicates the packets couldn't be 97 * transmitted due to 1) lack of resources (e.g. running out of 98 * descriptors), or 2) reaching the allowed bandwidth limit for this 99 * particular flow. The indication comes in the form of a Tx cookie that 100 * identifies the blocked ring. In such case, DLD will place a 101 * dummy message on its write-side STREAMS queue so that the queue is 102 * marked as "full". Any subsequent packets arriving at the driver will 103 * still be sent to the MAC layer where it either gets queued in the Tx 104 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS 105 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX. 106 * When the write service procedure runs, it will remove the dummy 107 * message from the write-side STREAMS queue; in effect this will trigger 108 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0, 109 * respectively, due to the above reasons. 110 * 111 * All non-data operations, both DLPI and ioctls are single threaded on a per 112 * dld_str_t endpoint. This is done using a taskq so that the control operation 113 * has kernel context and can cv_wait for resources. In addition all set type 114 * operations that involve mac level state modification are serialized on a 115 * per mac end point using the perimeter mechanism provided by the mac layer. 116 * This serializes all mac clients trying to modify a single mac end point over 117 * the entire sequence of mac calls made by that client as an atomic unit. The 118 * mac framework locking is described in mac.c. A critical element is that 119 * DLD/DLS does not hold any locks across the mac perimeter. 120 * 121 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 122 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 123 * match dev_t. If a stream is found and it is attached, its dev_info_t * 124 * is returned. If the mac handle is non-null, it can be safely accessed 125 * below. The mac handle won't be freed until the mac_unregister which 126 * won't happen until the driver detaches. The DDI framework ensures that 127 * the detach won't happen while a getinfo is in progress. 128 */ 129 typedef struct i_dld_str_state_s { 130 major_t ds_major; 131 minor_t ds_minor; 132 int ds_instance; 133 dev_info_t *ds_dip; 134 } i_dld_str_state_t; 135 136 /* ARGSUSED */ 137 static uint_t 138 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 139 { 140 i_dld_str_state_t *statep = arg; 141 dld_str_t *dsp = (dld_str_t *)val; 142 mac_handle_t mh; 143 144 if (statep->ds_major != dsp->ds_major) 145 return (MH_WALK_CONTINUE); 146 147 ASSERT(statep->ds_minor != 0); 148 mh = dsp->ds_mh; 149 150 if (statep->ds_minor == dsp->ds_minor) { 151 /* 152 * Clone: a clone minor is unique. we can terminate the 153 * walk if we find a matching stream -- even if we fail 154 * to obtain the devinfo. 155 */ 156 if (mh != NULL) { 157 statep->ds_dip = mac_devinfo_get(mh); 158 statep->ds_instance = DLS_MINOR2INST(mac_minor(mh)); 159 } 160 return (MH_WALK_TERMINATE); 161 } 162 return (MH_WALK_CONTINUE); 163 } 164 165 static dev_info_t * 166 dld_finddevinfo(dev_t dev) 167 { 168 dev_info_t *dip; 169 i_dld_str_state_t state; 170 171 if (getminor(dev) == 0) 172 return (NULL); 173 174 /* 175 * See if it's a minor node of a link 176 */ 177 if ((dip = dls_link_devinfo(dev)) != NULL) 178 return (dip); 179 180 state.ds_minor = getminor(dev); 181 state.ds_major = getmajor(dev); 182 state.ds_dip = NULL; 183 state.ds_instance = -1; 184 185 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 186 return (state.ds_dip); 187 } 188 189 int 190 dld_devt_to_instance(dev_t dev) 191 { 192 minor_t minor; 193 i_dld_str_state_t state; 194 195 /* 196 * GLDv3 numbers DLPI style 1 node as the instance number + 1. 197 * Minor number 0 is reserved for the DLPI style 2 unattached 198 * node. 199 */ 200 201 if ((minor = getminor(dev)) == 0) 202 return (-1); 203 204 /* 205 * Check for unopened style 1 node. 206 * Note that this doesn't *necessarily* work for legacy 207 * devices, but this code is only called within the 208 * getinfo(9e) implementation for true GLDv3 devices, so it 209 * doesn't matter. 210 */ 211 if (minor > 0 && minor <= DLS_MAX_MINOR) { 212 return (DLS_MINOR2INST(minor)); 213 } 214 215 state.ds_minor = getminor(dev); 216 state.ds_major = getmajor(dev); 217 state.ds_dip = NULL; 218 state.ds_instance = -1; 219 220 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 221 return (state.ds_instance); 222 } 223 224 /* 225 * devo_getinfo: getinfo(9e) 226 * 227 * NB: This may be called for a provider before the provider's 228 * instances are attached. Hence, if a particular provider needs a 229 * special mapping (the mac instance != ddi_get_instance()), then it 230 * may need to provide its own implmentation using the 231 * mac_devt_to_instance() function, and translating the returned mac 232 * instance to a devinfo instance. For dev_t's where the minor number 233 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this 234 * function indirectly via the mac_getinfo() function. 235 */ 236 /*ARGSUSED*/ 237 int 238 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 239 { 240 dev_info_t *devinfo; 241 minor_t minor = getminor((dev_t)arg); 242 int rc = DDI_FAILURE; 243 244 switch (cmd) { 245 case DDI_INFO_DEVT2DEVINFO: 246 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 247 *(dev_info_t **)resp = devinfo; 248 rc = DDI_SUCCESS; 249 } 250 break; 251 case DDI_INFO_DEVT2INSTANCE: 252 if (minor > 0 && minor <= DLS_MAX_MINOR) { 253 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 254 rc = DDI_SUCCESS; 255 } else if (minor > DLS_MAX_MINOR && 256 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 257 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 258 rc = DDI_SUCCESS; 259 } 260 break; 261 } 262 return (rc); 263 } 264 265 void * 266 dld_str_private(queue_t *q) 267 { 268 return (((dld_str_t *)(q->q_ptr))->ds_private); 269 } 270 271 int 272 dld_str_open(queue_t *rq, dev_t *devp, void *private) 273 { 274 dld_str_t *dsp; 275 major_t major; 276 minor_t minor; 277 int err; 278 279 major = getmajor(*devp); 280 minor = getminor(*devp); 281 282 /* 283 * Create a new dld_str_t for the stream. This will grab a new minor 284 * number that will be handed back in the cloned dev_t. Creation may 285 * fail if we can't allocate the dummy mblk used for flow-control. 286 */ 287 dsp = dld_str_create(rq, DLD_DLPI, major, 288 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 289 if (dsp == NULL) 290 return (ENOSR); 291 292 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 293 dsp->ds_private = private; 294 if (minor != 0) { 295 /* 296 * Style 1 open 297 */ 298 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 299 goto failed; 300 301 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 302 } else { 303 (void) qassociate(rq, -1); 304 } 305 306 /* 307 * Enable the queue srv(9e) routine. 308 */ 309 qprocson(rq); 310 311 /* 312 * Construct a cloned dev_t to hand back. 313 */ 314 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 315 return (0); 316 317 failed: 318 dld_str_destroy(dsp); 319 return (err); 320 } 321 322 int 323 dld_str_close(queue_t *rq) 324 { 325 dld_str_t *dsp = rq->q_ptr; 326 327 /* 328 * All modules on top have been popped off. So there can't be any 329 * threads from the top. 330 */ 331 ASSERT(dsp->ds_datathr_cnt == 0); 332 333 /* 334 * Wait until pending DLPI requests are processed. 335 */ 336 mutex_enter(&dsp->ds_lock); 337 while (dsp->ds_dlpi_pending) 338 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock); 339 mutex_exit(&dsp->ds_lock); 340 341 342 /* 343 * This stream was open to a provider node. Check to see 344 * if it has been cleanly shut down. 345 */ 346 if (dsp->ds_dlstate != DL_UNATTACHED) { 347 /* 348 * The stream is either open to a style 1 provider or 349 * this is not clean shutdown. Detach from the PPA. 350 * (This is still ok even in the style 1 case). 351 */ 352 dld_str_detach(dsp); 353 } 354 355 dld_str_destroy(dsp); 356 return (0); 357 } 358 359 /* 360 * qi_qopen: open(9e) 361 */ 362 /*ARGSUSED*/ 363 int 364 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 365 { 366 if (sflag == MODOPEN) 367 return (ENOTSUP); 368 369 /* 370 * This is a cloning driver and therefore each queue should only 371 * ever get opened once. 372 */ 373 if (rq->q_ptr != NULL) 374 return (EBUSY); 375 376 return (dld_str_open(rq, devp, NULL)); 377 } 378 379 /* 380 * qi_qclose: close(9e) 381 */ 382 /* ARGSUSED */ 383 int 384 dld_close(queue_t *rq, int flags __unused, cred_t *credp __unused) 385 { 386 /* 387 * Disable the queue srv(9e) routine. 388 */ 389 qprocsoff(rq); 390 391 return (dld_str_close(rq)); 392 } 393 394 /* 395 * qi_qputp: put(9e) 396 */ 397 int 398 dld_wput(queue_t *wq, mblk_t *mp) 399 { 400 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 401 dld_str_mode_t mode; 402 403 switch (DB_TYPE(mp)) { 404 case M_DATA: 405 mutex_enter(&dsp->ds_lock); 406 mode = dsp->ds_mode; 407 if ((dsp->ds_dlstate != DL_IDLE) || 408 (mode != DLD_FASTPATH && mode != DLD_RAW)) { 409 mutex_exit(&dsp->ds_lock); 410 freemsg(mp); 411 break; 412 } 413 414 DLD_DATATHR_INC(dsp); 415 mutex_exit(&dsp->ds_lock); 416 if (mode == DLD_FASTPATH) { 417 if (dsp->ds_mip->mi_media == DL_ETHER && 418 (MBLKL(mp) < sizeof (struct ether_header))) { 419 freemsg(mp); 420 } else { 421 (void) str_mdata_fastpath_put(dsp, mp, 0, 0); 422 } 423 } else { 424 str_mdata_raw_put(dsp, mp); 425 } 426 DLD_DATATHR_DCR(dsp); 427 break; 428 case M_PROTO: 429 case M_PCPROTO: { 430 t_uscalar_t prim; 431 432 if (MBLKL(mp) < sizeof (t_uscalar_t)) 433 break; 434 435 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 436 437 if (prim == DL_UNITDATA_REQ) { 438 proto_unitdata_req(dsp, mp); 439 } else { 440 dld_wput_nondata(dsp, mp); 441 } 442 break; 443 } 444 445 case M_IOCTL: 446 dld_wput_nondata(dsp, mp); 447 break; 448 449 case M_FLUSH: 450 if (*mp->b_rptr & FLUSHW) { 451 DLD_CLRQFULL(dsp); 452 *mp->b_rptr &= ~FLUSHW; 453 } 454 455 if (*mp->b_rptr & FLUSHR) { 456 qreply(wq, mp); 457 } else { 458 freemsg(mp); 459 } 460 break; 461 462 default: 463 freemsg(mp); 464 break; 465 } 466 return (0); 467 } 468 469 /* 470 * qi_srvp: srv(9e) 471 */ 472 int 473 dld_wsrv(queue_t *wq) 474 { 475 dld_str_t *dsp = wq->q_ptr; 476 477 DLD_CLRQFULL(dsp); 478 return (0); 479 } 480 481 void 482 dld_init_ops(struct dev_ops *ops, const char *name) 483 { 484 struct streamtab *stream; 485 struct qinit *rq, *wq; 486 struct module_info *modinfo; 487 488 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 489 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 490 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 491 modinfo->mi_minpsz = 0; 492 modinfo->mi_maxpsz = 64*1024; 493 modinfo->mi_hiwat = 1; 494 modinfo->mi_lowat = 0; 495 496 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 497 rq->qi_qopen = dld_open; 498 rq->qi_qclose = dld_close; 499 rq->qi_minfo = modinfo; 500 501 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 502 wq->qi_putp = (pfi_t)dld_wput; 503 wq->qi_srvp = (pfi_t)dld_wsrv; 504 wq->qi_minfo = modinfo; 505 506 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 507 stream->st_rdinit = rq; 508 stream->st_wrinit = wq; 509 ops->devo_cb_ops->cb_str = stream; 510 511 if (ops->devo_getinfo == NULL) 512 ops->devo_getinfo = &dld_getinfo; 513 } 514 515 void 516 dld_fini_ops(struct dev_ops *ops) 517 { 518 struct streamtab *stream; 519 struct qinit *rq, *wq; 520 struct module_info *modinfo; 521 522 stream = ops->devo_cb_ops->cb_str; 523 rq = stream->st_rdinit; 524 wq = stream->st_wrinit; 525 modinfo = rq->qi_minfo; 526 ASSERT(wq->qi_minfo == modinfo); 527 528 kmem_free(stream, sizeof (struct streamtab)); 529 kmem_free(wq, sizeof (struct qinit)); 530 kmem_free(rq, sizeof (struct qinit)); 531 kmem_free(modinfo->mi_idname, FMNAMESZ); 532 kmem_free(modinfo, sizeof (struct module_info)); 533 } 534 535 /* 536 * Initialize this module's data structures. 537 */ 538 void 539 dld_str_init(void) 540 { 541 /* 542 * Create dld_str_t object cache. 543 */ 544 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 545 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 546 ASSERT(str_cachep != NULL); 547 548 /* 549 * Create a hash table for maintaining dld_str_t's. 550 * The ds_minor field (the clone minor number) of a dld_str_t 551 * is used as a key for this hash table because this number is 552 * globally unique (allocated from "dls_minor_arena"). 553 */ 554 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 555 mod_hash_null_valdtor); 556 557 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL); 558 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL); 559 560 dld_taskq_quit = B_FALSE; 561 dld_taskq_done = B_FALSE; 562 list_create(&dld_taskq_list, sizeof (dld_str_t), 563 offsetof(dld_str_t, ds_tqlist)); 564 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0, 565 &p0, TS_RUN, minclsyspri); 566 } 567 568 /* 569 * Tear down this module's data structures. 570 */ 571 int 572 dld_str_fini(void) 573 { 574 /* 575 * Make sure that there are no objects in use. 576 */ 577 if (str_count != 0) 578 return (EBUSY); 579 580 /* 581 * Ask the dld_taskq thread to quit and wait for it to be done 582 */ 583 mutex_enter(&dld_taskq_lock); 584 dld_taskq_quit = B_TRUE; 585 cv_signal(&dld_taskq_cv); 586 while (!dld_taskq_done) 587 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 588 mutex_exit(&dld_taskq_lock); 589 list_destroy(&dld_taskq_list); 590 /* 591 * Destroy object cache. 592 */ 593 kmem_cache_destroy(str_cachep); 594 mod_hash_destroy_idhash(str_hashp); 595 return (0); 596 } 597 598 /* 599 * Create a new dld_str_t object. 600 */ 601 dld_str_t * 602 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 603 { 604 dld_str_t *dsp; 605 int err; 606 607 /* 608 * Allocate an object from the cache. 609 */ 610 atomic_inc_32(&str_count); 611 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 612 613 /* 614 * Allocate the dummy mblk for flow-control. 615 */ 616 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 617 if (dsp->ds_tx_flow_mp == NULL) { 618 kmem_cache_free(str_cachep, dsp); 619 atomic_dec_32(&str_count); 620 return (NULL); 621 } 622 dsp->ds_type = type; 623 dsp->ds_major = major; 624 dsp->ds_style = style; 625 626 /* 627 * Initialize the queue pointers. 628 */ 629 ASSERT(RD(rq) == rq); 630 dsp->ds_rq = rq; 631 dsp->ds_wq = WR(rq); 632 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 633 634 /* 635 * We want explicit control over our write-side STREAMS queue 636 * where the dummy mblk gets added/removed for flow-control. 637 */ 638 noenable(WR(rq)); 639 640 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 641 (mod_hash_val_t)dsp); 642 ASSERT(err == 0); 643 return (dsp); 644 } 645 646 /* 647 * Destroy a dld_str_t object. 648 */ 649 void 650 dld_str_destroy(dld_str_t *dsp) 651 { 652 queue_t *rq; 653 queue_t *wq; 654 mod_hash_val_t val; 655 656 /* 657 * Clear the queue pointers. 658 */ 659 rq = dsp->ds_rq; 660 wq = dsp->ds_wq; 661 ASSERT(wq == WR(rq)); 662 rq->q_ptr = wq->q_ptr = NULL; 663 dsp->ds_rq = dsp->ds_wq = NULL; 664 665 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 666 ASSERT(dsp->ds_sap == 0); 667 ASSERT(dsp->ds_mh == NULL); 668 ASSERT(dsp->ds_mch == NULL); 669 ASSERT(dsp->ds_promisc == 0); 670 ASSERT(dsp->ds_mph == NULL); 671 ASSERT(dsp->ds_mip == NULL); 672 ASSERT(dsp->ds_mnh == NULL); 673 674 ASSERT(dsp->ds_polling == B_FALSE); 675 ASSERT(dsp->ds_direct == B_FALSE); 676 ASSERT(dsp->ds_lso == B_FALSE); 677 ASSERT(dsp->ds_lso_max == 0); 678 ASSERT(dsp->ds_passivestate != DLD_ACTIVE); 679 680 /* 681 * Reinitialize all the flags. 682 */ 683 dsp->ds_notifications = 0; 684 dsp->ds_passivestate = DLD_UNINITIALIZED; 685 dsp->ds_mode = DLD_UNITDATA; 686 dsp->ds_native = B_FALSE; 687 dsp->ds_nonip = B_FALSE; 688 689 ASSERT(dsp->ds_datathr_cnt == 0); 690 ASSERT(dsp->ds_pending_head == NULL); 691 ASSERT(dsp->ds_pending_tail == NULL); 692 ASSERT(!dsp->ds_dlpi_pending); 693 694 ASSERT(dsp->ds_dlp == NULL); 695 ASSERT(dsp->ds_dmap == NULL); 696 ASSERT(dsp->ds_rx == NULL); 697 ASSERT(dsp->ds_rx_arg == NULL); 698 ASSERT(dsp->ds_next == NULL); 699 ASSERT(dsp->ds_head == NULL); 700 701 /* 702 * Free the dummy mblk if exists. 703 */ 704 if (dsp->ds_tx_flow_mp != NULL) { 705 freeb(dsp->ds_tx_flow_mp); 706 dsp->ds_tx_flow_mp = NULL; 707 } 708 709 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 710 ASSERT(dsp == (dld_str_t *)val); 711 712 /* 713 * Free the object back to the cache. 714 */ 715 kmem_cache_free(str_cachep, dsp); 716 atomic_dec_32(&str_count); 717 } 718 719 /* 720 * kmem_cache contructor function: see kmem_cache_create(9f). 721 */ 722 /*ARGSUSED*/ 723 static int 724 str_constructor(void *buf, void *cdrarg, int kmflags) 725 { 726 dld_str_t *dsp = buf; 727 728 bzero(buf, sizeof (dld_str_t)); 729 730 /* 731 * Allocate a new minor number. 732 */ 733 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) 734 return (-1); 735 736 /* 737 * Initialize the DLPI state machine. 738 */ 739 dsp->ds_dlstate = DL_UNATTACHED; 740 741 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL); 742 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL); 743 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL); 744 745 return (0); 746 } 747 748 /* 749 * kmem_cache destructor function. 750 */ 751 /*ARGSUSED*/ 752 static void 753 str_destructor(void *buf, void *cdrarg) 754 { 755 dld_str_t *dsp = buf; 756 757 /* 758 * Release the minor number. 759 */ 760 mac_minor_rele(dsp->ds_minor); 761 762 ASSERT(dsp->ds_tx_flow_mp == NULL); 763 764 mutex_destroy(&dsp->ds_lock); 765 cv_destroy(&dsp->ds_datathr_cv); 766 cv_destroy(&dsp->ds_dlpi_pending_cv); 767 } 768 769 /* 770 * Update the priority bits and VID (may need to insert tag if mp points 771 * to an untagged packet. 772 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 773 */ 774 static mblk_t * 775 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid, 776 link_tagmode_t tagmode) 777 { 778 mblk_t *hmp; 779 struct ether_vlan_header *evhp; 780 struct ether_header *ehp; 781 uint16_t old_tci = 0; 782 size_t len; 783 784 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 785 786 evhp = (struct ether_vlan_header *)mp->b_rptr; 787 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 788 /* 789 * Tagged packet, update the priority bits. 790 */ 791 len = sizeof (struct ether_vlan_header); 792 793 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 794 /* 795 * In case some drivers only check the db_ref 796 * count of the first mblk, we pullup the 797 * message into a single mblk. 798 */ 799 hmp = msgpullup(mp, -1); 800 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 801 freemsg(hmp); 802 return (NULL); 803 } else { 804 freemsg(mp); 805 mp = hmp; 806 } 807 } 808 809 evhp = (struct ether_vlan_header *)mp->b_rptr; 810 old_tci = ntohs(evhp->ether_tci); 811 } else { 812 /* 813 * Untagged packet. Two factors will cause us to insert a 814 * VLAN header: 815 * - This is a VLAN link (vid is specified) 816 * - The link supports user priority tagging and the priority 817 * is non-zero. 818 */ 819 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY) 820 return (mp); 821 822 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 823 if (hmp == NULL) 824 return (NULL); 825 826 evhp = (struct ether_vlan_header *)hmp->b_rptr; 827 ehp = (struct ether_header *)mp->b_rptr; 828 829 /* 830 * Copy the MAC addresses and typelen 831 */ 832 bcopy(ehp, evhp, (ETHERADDRL * 2)); 833 evhp->ether_type = ehp->ether_type; 834 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 835 836 hmp->b_wptr += sizeof (struct ether_vlan_header); 837 mp->b_rptr += sizeof (struct ether_header); 838 839 /* 840 * Free the original message if it's now empty. Link the 841 * rest of the messages to the header message. 842 */ 843 if (MBLKL(mp) == 0) { 844 hmp->b_cont = mp->b_cont; 845 freeb(mp); 846 } else { 847 hmp->b_cont = mp; 848 } 849 mp = hmp; 850 } 851 852 if (pri == 0) 853 pri = VLAN_PRI(old_tci); 854 if (vid == VLAN_ID_NONE) 855 vid = VLAN_ID(old_tci); 856 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 857 return (mp); 858 } 859 860 /* 861 * M_DATA put (IP fast-path mode) 862 */ 863 mac_tx_cookie_t 864 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint, 865 uint16_t flag) 866 { 867 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 868 mblk_t *newmp; 869 uint_t pri; 870 mac_tx_cookie_t cookie; 871 872 if (is_ethernet) { 873 /* 874 * Update the priority bits to the assigned priority. 875 */ 876 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 877 878 if (pri != 0) { 879 newmp = i_dld_ether_header_update_tag(mp, pri, 880 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode); 881 if (newmp == NULL) 882 goto discard; 883 mp = newmp; 884 } 885 } 886 887 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != 0) { 888 DLD_SETQFULL(dsp); 889 } 890 return (cookie); 891 892 discard: 893 /* TODO: bump kstat? */ 894 freemsg(mp); 895 return (0); 896 } 897 898 /* 899 * M_DATA put (DLIOCRAW mode) 900 */ 901 static void 902 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 903 { 904 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 905 mblk_t *bp, *newmp; 906 size_t size; 907 mac_header_info_t mhi; 908 uint_t pri, vid, dvid; 909 uint_t max_sdu; 910 911 /* 912 * Certain MAC type plugins provide an illusion for raw DLPI 913 * consumers. They pretend that the MAC layer is something that 914 * it's not for the benefit of observability tools. For example, 915 * mac_wifi pretends that it's Ethernet for such consumers. 916 * Here, unless native mode is enabled, we call into the MAC layer so 917 * that this illusion can be maintained. The plugin will optionally 918 * transform the MAC header here into something that can be passed 919 * down. The header goes from raw mode to "cooked" mode. 920 */ 921 if (!dsp->ds_native) { 922 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 923 goto discard; 924 mp = newmp; 925 } 926 927 size = MBLKL(mp); 928 929 /* 930 * Check the packet is not too big and that any remaining 931 * fragment list is composed entirely of M_DATA messages. (We 932 * know the first fragment was M_DATA otherwise we could not 933 * have got here). 934 */ 935 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 936 if (DB_TYPE(bp) != M_DATA) 937 goto discard; 938 size += MBLKL(bp); 939 } 940 941 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0) 942 goto discard; 943 944 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 945 /* 946 * If LSO is enabled, check the size against lso_max. Otherwise, 947 * compare the packet size with max_sdu. 948 */ 949 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu; 950 if (size > max_sdu + mhi.mhi_hdrsize) 951 goto discard; 952 953 if (is_ethernet) { 954 dvid = mac_client_vid(dsp->ds_mch); 955 956 /* 957 * Discard the packet if this is a VLAN stream but the VID in 958 * the packet is not correct. 959 */ 960 vid = VLAN_ID(mhi.mhi_tci); 961 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 962 goto discard; 963 964 /* 965 * Discard the packet if this packet is a tagged packet 966 * but both pri and VID are 0. 967 */ 968 pri = VLAN_PRI(mhi.mhi_tci); 969 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 && 970 vid == VLAN_ID_NONE) 971 goto discard; 972 973 /* 974 * Update the priority bits to the per-stream priority if 975 * priority is not set in the packet. Update the VID for 976 * packets on a VLAN stream. 977 */ 978 pri = (pri == 0) ? dsp->ds_pri : 0; 979 if ((pri != 0) || (dvid != VLAN_ID_NONE)) { 980 if ((newmp = i_dld_ether_header_update_tag(mp, pri, 981 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) { 982 goto discard; 983 } 984 mp = newmp; 985 } 986 } 987 988 if (DLD_TX(dsp, mp, 0, 0) != 0) { 989 /* Turn on flow-control for dld */ 990 DLD_SETQFULL(dsp); 991 } 992 return; 993 994 discard: 995 /* TODO: bump kstat? */ 996 freemsg(mp); 997 } 998 999 /* 1000 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 1001 */ 1002 int 1003 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1004 { 1005 dev_t dev; 1006 int err; 1007 const char *drvname; 1008 mac_perim_handle_t mph = NULL; 1009 boolean_t qassociated = B_FALSE; 1010 dls_link_t *dlp = NULL; 1011 dls_dl_handle_t ddp = NULL; 1012 1013 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1014 return (EINVAL); 1015 1016 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) 1017 return (ENOTSUP); 1018 1019 /* 1020 * /dev node access. This will still be supported for backward 1021 * compatibility reason. 1022 */ 1023 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && 1024 (strcmp(drvname, "vnic") != 0)) { 1025 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1026 return (EINVAL); 1027 qassociated = B_TRUE; 1028 } 1029 1030 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); 1031 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0) 1032 goto failed; 1033 1034 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0) 1035 goto failed; 1036 1037 /* 1038 * Open a channel. 1039 */ 1040 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0) 1041 goto failed; 1042 1043 if ((err = dls_open(dlp, ddp, dsp)) != 0) 1044 goto failed; 1045 1046 /* 1047 * Set the default packet priority. 1048 */ 1049 dsp->ds_pri = 0; 1050 1051 /* 1052 * Add a notify function so that the we get updates from the MAC. 1053 */ 1054 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp); 1055 dsp->ds_dlstate = DL_UNBOUND; 1056 mac_perim_exit(mph); 1057 return (0); 1058 1059 failed: 1060 if (dlp != NULL) 1061 dls_link_rele(dlp); 1062 if (mph != NULL) 1063 mac_perim_exit(mph); 1064 if (ddp != NULL) 1065 dls_devnet_rele(ddp); 1066 if (qassociated) 1067 (void) qassociate(dsp->ds_wq, -1); 1068 1069 return (err); 1070 } 1071 1072 /* 1073 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1074 * from close(2) for style 2. 1075 */ 1076 void 1077 dld_str_detach(dld_str_t *dsp) 1078 { 1079 mac_perim_handle_t mph; 1080 int err; 1081 1082 ASSERT(dsp->ds_datathr_cnt == 0); 1083 1084 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 1085 /* 1086 * Remove the notify function. 1087 * 1088 * Note that we cannot wait for the notification callback to be removed 1089 * since it could cause the deadlock with str_notify() since they both 1090 * need the mac perimeter. Continue if we cannot remove the 1091 * notification callback right now and wait after we leave the 1092 * perimeter. 1093 */ 1094 err = mac_notify_remove(dsp->ds_mnh, B_FALSE); 1095 dsp->ds_mnh = NULL; 1096 1097 /* 1098 * Disable the capabilities 1099 */ 1100 dld_capabilities_disable(dsp); 1101 1102 /* 1103 * Clear LSO flags. 1104 */ 1105 dsp->ds_lso = B_FALSE; 1106 dsp->ds_lso_max = 0; 1107 1108 dls_close(dsp); 1109 mac_perim_exit(mph); 1110 1111 /* 1112 * Now we leave the mac perimeter. If mac_notify_remove() failed 1113 * because the notification callback was in progress, wait for 1114 * it to finish before we proceed. 1115 */ 1116 if (err != 0) 1117 mac_notify_remove_wait(dsp->ds_mh); 1118 1119 /* 1120 * An unreferenced tagged (non-persistent) vlan gets destroyed 1121 * automatically in the call to dls_devnet_rele. 1122 */ 1123 dls_devnet_rele(dsp->ds_ddh); 1124 1125 dsp->ds_sap = 0; 1126 dsp->ds_mh = NULL; 1127 dsp->ds_mch = NULL; 1128 dsp->ds_mip = NULL; 1129 1130 if (dsp->ds_style == DL_STYLE2) 1131 (void) qassociate(dsp->ds_wq, -1); 1132 1133 /* 1134 * Re-initialize the DLPI state machine. 1135 */ 1136 dsp->ds_dlstate = DL_UNATTACHED; 1137 } 1138 1139 /* 1140 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1141 * tags before sending packets up to the DLS clients, with the exception of 1142 * special priority tagged packets, in that case, we set the VID to 0. 1143 * mp must be a VLAN tagged packet. 1144 */ 1145 static mblk_t * 1146 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri) 1147 { 1148 mblk_t *newmp; 1149 struct ether_vlan_header *evhp; 1150 uint16_t tci, new_tci; 1151 1152 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1153 if (DB_REF(mp) > 1) { 1154 newmp = copymsg(mp); 1155 if (newmp == NULL) 1156 return (NULL); 1157 freemsg(mp); 1158 mp = newmp; 1159 } 1160 evhp = (struct ether_vlan_header *)mp->b_rptr; 1161 1162 tci = ntohs(evhp->ether_tci); 1163 if (VLAN_PRI(tci) == 0 || !keep_pri) { 1164 /* 1165 * Priority is 0, strip the tag. 1166 */ 1167 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1168 mp->b_rptr += VLAN_TAGSZ; 1169 } else { 1170 /* 1171 * Priority is not 0, update the VID to 0. 1172 */ 1173 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1174 evhp->ether_tci = htons(new_tci); 1175 } 1176 return (mp); 1177 } 1178 1179 /* 1180 * Raw mode receive function. 1181 */ 1182 /*ARGSUSED*/ 1183 void 1184 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1185 mac_header_info_t *mhip) 1186 { 1187 dld_str_t *dsp = (dld_str_t *)arg; 1188 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1189 mblk_t *next, *newmp; 1190 1191 ASSERT(mp != NULL); 1192 do { 1193 /* 1194 * Get the pointer to the next packet in the chain and then 1195 * clear b_next before the packet gets passed on. 1196 */ 1197 next = mp->b_next; 1198 mp->b_next = NULL; 1199 1200 /* 1201 * Wind back b_rptr to point at the MAC header. 1202 */ 1203 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1204 mp->b_rptr -= mhip->mhi_hdrsize; 1205 1206 /* 1207 * Certain MAC type plugins provide an illusion for raw 1208 * DLPI consumers. They pretend that the MAC layer is 1209 * something that it's not for the benefit of observability 1210 * tools. For example, mac_wifi pretends that it's Ethernet 1211 * for such consumers. Here, unless native mode is enabled, 1212 * we call into the MAC layer so that this illusion can be 1213 * maintained. The plugin will optionally transform the MAC 1214 * header here into something that can be passed up to raw 1215 * consumers. The header goes from "cooked" mode to raw mode. 1216 */ 1217 if (!dsp->ds_native) { 1218 newmp = mac_header_uncook(dsp->ds_mh, mp); 1219 if (newmp == NULL) { 1220 freemsg(mp); 1221 goto next; 1222 } 1223 mp = newmp; 1224 } 1225 1226 /* 1227 * Strip the VLAN tag for VLAN streams. 1228 */ 1229 if (is_ethernet && 1230 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) { 1231 /* 1232 * The priority should be kept only for VLAN 1233 * data-links. 1234 */ 1235 newmp = i_dld_ether_header_strip_tag(mp, 1236 mac_client_is_vlan_vnic(dsp->ds_mch)); 1237 if (newmp == NULL) { 1238 freemsg(mp); 1239 goto next; 1240 } 1241 mp = newmp; 1242 } 1243 1244 /* 1245 * Pass the packet on. 1246 */ 1247 if (canputnext(dsp->ds_rq)) 1248 putnext(dsp->ds_rq, mp); 1249 else 1250 freemsg(mp); 1251 1252 next: 1253 /* 1254 * Move on to the next packet in the chain. 1255 */ 1256 mp = next; 1257 } while (mp != NULL); 1258 } 1259 1260 /* 1261 * Fast-path receive function. 1262 */ 1263 /*ARGSUSED*/ 1264 void 1265 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1266 mac_header_info_t *mhip) 1267 { 1268 dld_str_t *dsp = (dld_str_t *)arg; 1269 mblk_t *next; 1270 size_t offset = 0; 1271 1272 /* 1273 * MAC header stripping rules: 1274 * - Tagged packets: 1275 * a. VLAN streams. Strip the whole VLAN header including the tag. 1276 * b. Physical streams 1277 * - VLAN packets (non-zero VID). The stream must be either a 1278 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1279 * Strip the Ethernet header but keep the VLAN header. 1280 * - Special tagged packets (zero VID) 1281 * * The stream is either a DL_PROMISC_SAP listener or a 1282 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1283 * keep the VLAN header. 1284 * * Otherwise, strip the whole VLAN header. 1285 * - Untagged packets. Strip the whole MAC header. 1286 */ 1287 if (mhip->mhi_istagged && 1288 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1289 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1290 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1291 offset = VLAN_TAGSZ; 1292 } 1293 1294 ASSERT(mp != NULL); 1295 do { 1296 /* 1297 * Get the pointer to the next packet in the chain and then 1298 * clear b_next before the packet gets passed on. 1299 */ 1300 next = mp->b_next; 1301 mp->b_next = NULL; 1302 1303 /* 1304 * Wind back b_rptr to point at the VLAN header. 1305 */ 1306 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1307 mp->b_rptr -= offset; 1308 1309 /* 1310 * Pass the packet on. 1311 */ 1312 if (canputnext(dsp->ds_rq)) 1313 putnext(dsp->ds_rq, mp); 1314 else 1315 freemsg(mp); 1316 /* 1317 * Move on to the next packet in the chain. 1318 */ 1319 mp = next; 1320 } while (mp != NULL); 1321 } 1322 1323 /* 1324 * Default receive function (send DL_UNITDATA_IND messages). 1325 */ 1326 /*ARGSUSED*/ 1327 void 1328 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1329 mac_header_info_t *mhip) 1330 { 1331 dld_str_t *dsp = (dld_str_t *)arg; 1332 mblk_t *ud_mp; 1333 mblk_t *next; 1334 size_t offset = 0; 1335 boolean_t strip_vlan = B_TRUE; 1336 1337 /* 1338 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1339 */ 1340 if (mhip->mhi_istagged && 1341 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1342 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1343 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1344 offset = VLAN_TAGSZ; 1345 strip_vlan = B_FALSE; 1346 } 1347 1348 ASSERT(mp != NULL); 1349 do { 1350 /* 1351 * Get the pointer to the next packet in the chain and then 1352 * clear b_next before the packet gets passed on. 1353 */ 1354 next = mp->b_next; 1355 mp->b_next = NULL; 1356 1357 /* 1358 * Wind back b_rptr to point at the MAC header. 1359 */ 1360 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1361 mp->b_rptr -= mhip->mhi_hdrsize; 1362 1363 /* 1364 * Create the DL_UNITDATA_IND M_PROTO. 1365 */ 1366 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1367 freemsgchain(mp); 1368 return; 1369 } 1370 1371 /* 1372 * Advance b_rptr to point at the payload (or the VLAN header). 1373 */ 1374 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1375 1376 /* 1377 * Prepend the DL_UNITDATA_IND. 1378 */ 1379 ud_mp->b_cont = mp; 1380 1381 /* 1382 * Send the message. 1383 */ 1384 if (canputnext(dsp->ds_rq)) 1385 putnext(dsp->ds_rq, ud_mp); 1386 else 1387 freemsg(ud_mp); 1388 1389 /* 1390 * Move on to the next packet in the chain. 1391 */ 1392 mp = next; 1393 } while (mp != NULL); 1394 } 1395 1396 /* 1397 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE 1398 */ 1399 static void 1400 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu) 1401 { 1402 mblk_t *mp; 1403 dl_notify_ind_t *dlip; 1404 1405 if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2))) 1406 return; 1407 1408 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1409 M_PROTO, 0)) == NULL) 1410 return; 1411 1412 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1413 dlip = (dl_notify_ind_t *)mp->b_rptr; 1414 dlip->dl_primitive = DL_NOTIFY_IND; 1415 if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) { 1416 dlip->dl_notification = DL_NOTE_SDU_SIZE2; 1417 dlip->dl_data1 = max_sdu; 1418 dlip->dl_data2 = multicast_sdu; 1419 } else { 1420 dlip->dl_notification = DL_NOTE_SDU_SIZE; 1421 dlip->dl_data = max_sdu; 1422 } 1423 1424 qreply(dsp->ds_wq, mp); 1425 } 1426 1427 /* 1428 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1429 * current state of the interface. 1430 */ 1431 void 1432 dld_str_notify_ind(dld_str_t *dsp) 1433 { 1434 mac_notify_type_t type; 1435 1436 for (type = 0; type < MAC_NNOTE; type++) 1437 str_notify(dsp, type); 1438 } 1439 1440 typedef struct dl_unitdata_ind_wrapper { 1441 dl_unitdata_ind_t dl_unitdata; 1442 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1443 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1444 } dl_unitdata_ind_wrapper_t; 1445 1446 /* 1447 * Create a DL_UNITDATA_IND M_PROTO message. 1448 */ 1449 static mblk_t * 1450 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1451 { 1452 mblk_t *nmp; 1453 dl_unitdata_ind_wrapper_t *dlwp; 1454 dl_unitdata_ind_t *dlp; 1455 mac_header_info_t mhi; 1456 uint_t addr_length; 1457 uint8_t *daddr; 1458 uint8_t *saddr; 1459 1460 /* 1461 * Get the packet header information. 1462 */ 1463 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0) 1464 return (NULL); 1465 1466 /* 1467 * Allocate a message large enough to contain the wrapper structure 1468 * defined above. 1469 */ 1470 if ((nmp = mexchange(dsp->ds_wq, NULL, 1471 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1472 DL_UNITDATA_IND)) == NULL) 1473 return (NULL); 1474 1475 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1476 1477 dlp = &(dlwp->dl_unitdata); 1478 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1479 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1480 1481 /* 1482 * Copy in the destination address. 1483 */ 1484 addr_length = dsp->ds_mip->mi_addr_length; 1485 daddr = dlwp->dl_dest_addr; 1486 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1487 bcopy(mhi.mhi_daddr, daddr, addr_length); 1488 1489 /* 1490 * Set the destination DLSAP to the SAP value encoded in the packet. 1491 */ 1492 if (mhi.mhi_istagged && !strip_vlan) 1493 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1494 else 1495 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1496 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1497 1498 /* 1499 * If the destination address was multicast or broadcast then the 1500 * dl_group_address field should be non-zero. 1501 */ 1502 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1503 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1504 1505 /* 1506 * Copy in the source address if one exists. Some MAC types (DL_IB 1507 * for example) may not have access to source information. 1508 */ 1509 if (mhi.mhi_saddr == NULL) { 1510 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1511 } else { 1512 saddr = dlwp->dl_src_addr; 1513 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1514 bcopy(mhi.mhi_saddr, saddr, addr_length); 1515 1516 /* 1517 * Set the source DLSAP to the packet ethertype. 1518 */ 1519 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1520 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1521 } 1522 1523 return (nmp); 1524 } 1525 1526 /* 1527 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1528 */ 1529 static void 1530 str_notify_promisc_on_phys(dld_str_t *dsp) 1531 { 1532 mblk_t *mp; 1533 dl_notify_ind_t *dlip; 1534 1535 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1536 return; 1537 1538 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1539 M_PROTO, 0)) == NULL) 1540 return; 1541 1542 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1543 dlip = (dl_notify_ind_t *)mp->b_rptr; 1544 dlip->dl_primitive = DL_NOTIFY_IND; 1545 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1546 1547 qreply(dsp->ds_wq, mp); 1548 } 1549 1550 /* 1551 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1552 */ 1553 static void 1554 str_notify_promisc_off_phys(dld_str_t *dsp) 1555 { 1556 mblk_t *mp; 1557 dl_notify_ind_t *dlip; 1558 1559 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1560 return; 1561 1562 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1563 M_PROTO, 0)) == NULL) 1564 return; 1565 1566 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1567 dlip = (dl_notify_ind_t *)mp->b_rptr; 1568 dlip->dl_primitive = DL_NOTIFY_IND; 1569 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1570 1571 qreply(dsp->ds_wq, mp); 1572 } 1573 1574 /* 1575 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1576 */ 1577 static void 1578 str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr) 1579 { 1580 mblk_t *mp; 1581 dl_notify_ind_t *dlip; 1582 uint_t addr_length; 1583 uint16_t ethertype; 1584 1585 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1586 return; 1587 1588 addr_length = dsp->ds_mip->mi_addr_length; 1589 if ((mp = mexchange(dsp->ds_wq, NULL, 1590 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1591 M_PROTO, 0)) == NULL) 1592 return; 1593 1594 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1595 dlip = (dl_notify_ind_t *)mp->b_rptr; 1596 dlip->dl_primitive = DL_NOTIFY_IND; 1597 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1598 dlip->dl_data = addr_type; 1599 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1600 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1601 1602 bcopy(addr, &dlip[1], addr_length); 1603 1604 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1605 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1606 1607 qreply(dsp->ds_wq, mp); 1608 } 1609 1610 /* 1611 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1612 */ 1613 static void 1614 str_notify_link_up(dld_str_t *dsp) 1615 { 1616 mblk_t *mp; 1617 dl_notify_ind_t *dlip; 1618 1619 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1620 return; 1621 1622 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1623 M_PROTO, 0)) == NULL) 1624 return; 1625 1626 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1627 dlip = (dl_notify_ind_t *)mp->b_rptr; 1628 dlip->dl_primitive = DL_NOTIFY_IND; 1629 dlip->dl_notification = DL_NOTE_LINK_UP; 1630 1631 qreply(dsp->ds_wq, mp); 1632 } 1633 1634 /* 1635 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1636 */ 1637 static void 1638 str_notify_link_down(dld_str_t *dsp) 1639 { 1640 mblk_t *mp; 1641 dl_notify_ind_t *dlip; 1642 1643 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1644 return; 1645 1646 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1647 M_PROTO, 0)) == NULL) 1648 return; 1649 1650 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1651 dlip = (dl_notify_ind_t *)mp->b_rptr; 1652 dlip->dl_primitive = DL_NOTIFY_IND; 1653 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1654 1655 qreply(dsp->ds_wq, mp); 1656 } 1657 1658 /* 1659 * DL_NOTIFY_IND: DL_NOTE_SPEED 1660 */ 1661 static void 1662 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1663 { 1664 mblk_t *mp; 1665 dl_notify_ind_t *dlip; 1666 1667 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1668 return; 1669 1670 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1671 M_PROTO, 0)) == NULL) 1672 return; 1673 1674 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1675 dlip = (dl_notify_ind_t *)mp->b_rptr; 1676 dlip->dl_primitive = DL_NOTIFY_IND; 1677 dlip->dl_notification = DL_NOTE_SPEED; 1678 dlip->dl_data = speed; 1679 1680 qreply(dsp->ds_wq, mp); 1681 } 1682 1683 /* 1684 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1685 */ 1686 static void 1687 str_notify_capab_reneg(dld_str_t *dsp) 1688 { 1689 mblk_t *mp; 1690 dl_notify_ind_t *dlip; 1691 1692 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1693 return; 1694 1695 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1696 M_PROTO, 0)) == NULL) 1697 return; 1698 1699 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1700 dlip = (dl_notify_ind_t *)mp->b_rptr; 1701 dlip->dl_primitive = DL_NOTIFY_IND; 1702 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1703 1704 qreply(dsp->ds_wq, mp); 1705 } 1706 1707 /* 1708 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1709 */ 1710 static void 1711 str_notify_fastpath_flush(dld_str_t *dsp) 1712 { 1713 mblk_t *mp; 1714 dl_notify_ind_t *dlip; 1715 1716 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1717 return; 1718 1719 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1720 M_PROTO, 0)) == NULL) 1721 return; 1722 1723 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1724 dlip = (dl_notify_ind_t *)mp->b_rptr; 1725 dlip->dl_primitive = DL_NOTIFY_IND; 1726 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1727 1728 qreply(dsp->ds_wq, mp); 1729 } 1730 1731 static void 1732 str_notify_allowed_ips(dld_str_t *dsp) 1733 { 1734 mblk_t *mp; 1735 dl_notify_ind_t *dlip; 1736 size_t mp_size; 1737 mac_protect_t *mrp; 1738 1739 if (!(dsp->ds_notifications & DL_NOTE_ALLOWED_IPS)) 1740 return; 1741 1742 mp_size = sizeof (mac_protect_t) + sizeof (dl_notify_ind_t); 1743 if ((mp = mexchange(dsp->ds_wq, NULL, mp_size, M_PROTO, 0)) == NULL) 1744 return; 1745 1746 mrp = mac_protect_get(dsp->ds_mh); 1747 bzero(mp->b_rptr, mp_size); 1748 dlip = (dl_notify_ind_t *)mp->b_rptr; 1749 dlip->dl_primitive = DL_NOTIFY_IND; 1750 dlip->dl_notification = DL_NOTE_ALLOWED_IPS; 1751 dlip->dl_data = 0; 1752 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1753 dlip->dl_addr_length = sizeof (mac_protect_t); 1754 bcopy(mrp, mp->b_rptr + sizeof (dl_notify_ind_t), 1755 sizeof (mac_protect_t)); 1756 1757 qreply(dsp->ds_wq, mp); 1758 } 1759 1760 /* 1761 * MAC notification callback. 1762 */ 1763 void 1764 str_notify(void *arg, mac_notify_type_t type) 1765 { 1766 dld_str_t *dsp = (dld_str_t *)arg; 1767 queue_t *q = dsp->ds_wq; 1768 mac_handle_t mh = dsp->ds_mh; 1769 mac_client_handle_t mch = dsp->ds_mch; 1770 uint8_t addr[MAXMACADDRLEN]; 1771 1772 switch (type) { 1773 case MAC_NOTE_TX: 1774 qenable(q); 1775 break; 1776 1777 case MAC_NOTE_DEVPROMISC: 1778 /* 1779 * Send the appropriate DL_NOTIFY_IND. 1780 */ 1781 if (mac_promisc_get(mh)) 1782 str_notify_promisc_on_phys(dsp); 1783 else 1784 str_notify_promisc_off_phys(dsp); 1785 break; 1786 1787 case MAC_NOTE_UNICST: 1788 /* 1789 * This notification is sent whenever the MAC unicast 1790 * address changes. 1791 */ 1792 mac_unicast_primary_get(mh, addr); 1793 1794 /* 1795 * Send the appropriate DL_NOTIFY_IND. 1796 */ 1797 str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr); 1798 break; 1799 1800 case MAC_NOTE_DEST: 1801 /* 1802 * Only send up DL_NOTE_DEST_ADDR if the link has a 1803 * destination address. 1804 */ 1805 if (mac_dst_get(dsp->ds_mh, addr)) 1806 str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr); 1807 break; 1808 1809 case MAC_NOTE_LOWLINK: 1810 case MAC_NOTE_LINK: 1811 /* 1812 * LOWLINK refers to the actual link status. For links that 1813 * are not part of a bridge instance LOWLINK and LINK state 1814 * are the same. But for a link part of a bridge instance 1815 * LINK state refers to the aggregate link status: "up" when 1816 * at least one link part of the bridge is up and is "down" 1817 * when all links part of the bridge are down. 1818 * 1819 * Clients can request to be notified of the LOWLINK state 1820 * using the DLIOCLOWLINK ioctl. Clients such as the bridge 1821 * daemon request lowlink state changes and upper layer clients 1822 * receive notifications of the aggregate link state changes 1823 * which is the default when requesting LINK UP/DOWN state 1824 * notifications. 1825 */ 1826 1827 /* 1828 * Check that the notification type matches the one that we 1829 * want. If we want lower-level link notifications, and this 1830 * is upper, or if we want upper and this is lower, then 1831 * ignore. 1832 */ 1833 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink) 1834 break; 1835 /* 1836 * This notification is sent every time the MAC driver 1837 * updates the link state. 1838 */ 1839 switch (mac_client_stat_get(mch, dsp->ds_lowlink ? 1840 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) { 1841 case LINK_STATE_UP: { 1842 uint64_t speed; 1843 /* 1844 * The link is up so send the appropriate 1845 * DL_NOTIFY_IND. 1846 */ 1847 str_notify_link_up(dsp); 1848 1849 speed = mac_stat_get(mh, MAC_STAT_IFSPEED); 1850 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1851 break; 1852 } 1853 case LINK_STATE_DOWN: 1854 /* 1855 * The link is down so send the appropriate 1856 * DL_NOTIFY_IND. 1857 */ 1858 str_notify_link_down(dsp); 1859 break; 1860 1861 default: 1862 break; 1863 } 1864 break; 1865 1866 case MAC_NOTE_CAPAB_CHG: 1867 /* 1868 * This notification is sent whenever the MAC resources 1869 * change or capabilities change. We need to renegotiate 1870 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1871 */ 1872 str_notify_capab_reneg(dsp); 1873 break; 1874 1875 case MAC_NOTE_SDU_SIZE: { 1876 uint_t max_sdu; 1877 uint_t multicast_sdu; 1878 mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu); 1879 str_notify_sdu_size(dsp, max_sdu, multicast_sdu); 1880 break; 1881 } 1882 1883 case MAC_NOTE_FASTPATH_FLUSH: 1884 str_notify_fastpath_flush(dsp); 1885 break; 1886 1887 /* Unused notifications */ 1888 case MAC_NOTE_MARGIN: 1889 break; 1890 1891 case MAC_NOTE_ALLOWED_IPS: 1892 str_notify_allowed_ips(dsp); 1893 break; 1894 1895 default: 1896 ASSERT(B_FALSE); 1897 break; 1898 } 1899 } 1900 1901 /* 1902 * This function is called via a taskq mechansim to process all control 1903 * messages on a per 'dsp' end point. 1904 */ 1905 static void 1906 dld_wput_nondata_task(void *arg) 1907 { 1908 dld_str_t *dsp = arg; 1909 mblk_t *mp; 1910 1911 mutex_enter(&dsp->ds_lock); 1912 while (dsp->ds_pending_head != NULL) { 1913 mp = dsp->ds_pending_head; 1914 dsp->ds_pending_head = mp->b_next; 1915 mp->b_next = NULL; 1916 if (dsp->ds_pending_head == NULL) 1917 dsp->ds_pending_tail = NULL; 1918 mutex_exit(&dsp->ds_lock); 1919 1920 switch (DB_TYPE(mp)) { 1921 case M_PROTO: 1922 case M_PCPROTO: 1923 dld_proto(dsp, mp); 1924 break; 1925 case M_IOCTL: 1926 dld_ioc(dsp, mp); 1927 break; 1928 default: 1929 ASSERT(0); 1930 } 1931 1932 mutex_enter(&dsp->ds_lock); 1933 } 1934 ASSERT(dsp->ds_pending_tail == NULL); 1935 dsp->ds_dlpi_pending = 0; 1936 cv_broadcast(&dsp->ds_dlpi_pending_cv); 1937 mutex_exit(&dsp->ds_lock); 1938 } 1939 1940 /* 1941 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This 1942 * thread is started at boot time. 1943 */ 1944 static void 1945 dld_taskq_dispatch(void) 1946 { 1947 callb_cpr_t cprinfo; 1948 dld_str_t *dsp; 1949 1950 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr, 1951 "dld_taskq_dispatch"); 1952 mutex_enter(&dld_taskq_lock); 1953 1954 while (!dld_taskq_quit) { 1955 dsp = list_head(&dld_taskq_list); 1956 while (dsp != NULL) { 1957 list_remove(&dld_taskq_list, dsp); 1958 mutex_exit(&dld_taskq_lock); 1959 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task, 1960 dsp, TQ_SLEEP) != TASKQID_INVALID); 1961 mutex_enter(&dld_taskq_lock); 1962 dsp = list_head(&dld_taskq_list); 1963 } 1964 1965 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1966 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 1967 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock); 1968 } 1969 1970 dld_taskq_done = B_TRUE; 1971 cv_signal(&dld_taskq_cv); 1972 CALLB_CPR_EXIT(&cprinfo); 1973 thread_exit(); 1974 } 1975 1976 /* 1977 * All control operations are serialized on the 'dsp' and are also funneled 1978 * through a taskq mechanism to ensure that subsequent processing has kernel 1979 * context and can safely use cv_wait. 1980 * 1981 * Mechanisms to handle taskq dispatch failures 1982 * 1983 * The only way to be sure that taskq dispatch does not fail is to either 1984 * specify TQ_SLEEP or to use a static taskq and prepopulate it with 1985 * some number of entries and make sure that the number of outstanding requests 1986 * are less than that number. We can't use TQ_SLEEP since we don't know the 1987 * context. Nor can we bound the total number of 'dsp' end points. So we are 1988 * unable to use either of the above schemes, and are forced to deal with 1989 * taskq dispatch failures. Note that even dynamic taskq could fail in 1990 * dispatch if TQ_NOSLEEP is specified, since this flag is translated 1991 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq 1992 * framework. 1993 * 1994 * We maintain a queue of 'dsp's that encountered taskq dispatch failure. 1995 * We also have a single global thread to retry the taskq dispatch. This 1996 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but 1997 * uses TQ_SLEEP to ensure eventual success of the dispatch operation. 1998 */ 1999 static void 2000 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) 2001 { 2002 ASSERT(mp->b_next == NULL); 2003 mutex_enter(&dsp->ds_lock); 2004 if (dsp->ds_pending_head != NULL) { 2005 ASSERT(dsp->ds_dlpi_pending); 2006 dsp->ds_pending_tail->b_next = mp; 2007 dsp->ds_pending_tail = mp; 2008 mutex_exit(&dsp->ds_lock); 2009 return; 2010 } 2011 ASSERT(dsp->ds_pending_tail == NULL); 2012 dsp->ds_pending_head = dsp->ds_pending_tail = mp; 2013 /* 2014 * At this point if ds_dlpi_pending is set, it implies that the taskq 2015 * thread is still active and is processing the last message, though 2016 * the pending queue has been emptied. 2017 */ 2018 if (dsp->ds_dlpi_pending) { 2019 mutex_exit(&dsp->ds_lock); 2020 return; 2021 } 2022 2023 dsp->ds_dlpi_pending = 1; 2024 mutex_exit(&dsp->ds_lock); 2025 2026 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp, 2027 TQ_NOSLEEP) != TASKQID_INVALID) 2028 return; 2029 2030 mutex_enter(&dld_taskq_lock); 2031 list_insert_tail(&dld_taskq_list, dsp); 2032 cv_signal(&dld_taskq_cv); 2033 mutex_exit(&dld_taskq_lock); 2034 } 2035 2036 /* 2037 * Process an M_IOCTL message. 2038 */ 2039 static void 2040 dld_ioc(dld_str_t *dsp, mblk_t *mp) 2041 { 2042 uint_t cmd; 2043 2044 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 2045 ASSERT(dsp->ds_type == DLD_DLPI); 2046 2047 switch (cmd) { 2048 case DLIOCNATIVE: 2049 ioc_native(dsp, mp); 2050 break; 2051 case DLIOCMARGININFO: 2052 ioc_margin(dsp, mp); 2053 break; 2054 case DLIOCRAW: 2055 ioc_raw(dsp, mp); 2056 break; 2057 case DLIOCHDRINFO: 2058 ioc_fast(dsp, mp); 2059 break; 2060 case DLIOCLOWLINK: 2061 ioc_lowlink(dsp, mp); 2062 break; 2063 default: 2064 ioc(dsp, mp); 2065 } 2066 } 2067 2068 /* 2069 * DLIOCNATIVE 2070 */ 2071 static void 2072 ioc_native(dld_str_t *dsp, mblk_t *mp) 2073 { 2074 queue_t *q = dsp->ds_wq; 2075 const mac_info_t *mip = dsp->ds_mip; 2076 2077 /* 2078 * Native mode can be enabled if it's disabled and if the 2079 * native media type is different. 2080 */ 2081 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 2082 dsp->ds_native = B_TRUE; 2083 2084 if (dsp->ds_native) 2085 miocack(q, mp, 0, mip->mi_nativemedia); 2086 else 2087 miocnak(q, mp, 0, ENOTSUP); 2088 } 2089 2090 /* 2091 * DLIOCMARGININFO 2092 */ 2093 static void 2094 ioc_margin(dld_str_t *dsp, mblk_t *mp) 2095 { 2096 queue_t *q = dsp->ds_wq; 2097 uint32_t margin; 2098 int err; 2099 2100 if (dsp->ds_dlstate == DL_UNATTACHED) { 2101 err = EINVAL; 2102 goto failed; 2103 } 2104 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) 2105 goto failed; 2106 2107 mac_margin_get(dsp->ds_mh, &margin); 2108 *((uint32_t *)mp->b_cont->b_rptr) = margin; 2109 miocack(q, mp, sizeof (uint32_t), 0); 2110 return; 2111 2112 failed: 2113 miocnak(q, mp, 0, err); 2114 } 2115 2116 /* 2117 * DLIOCRAW 2118 */ 2119 static void 2120 ioc_raw(dld_str_t *dsp, mblk_t *mp) 2121 { 2122 queue_t *q = dsp->ds_wq; 2123 mac_perim_handle_t mph; 2124 2125 if (dsp->ds_mh == NULL) { 2126 dsp->ds_mode = DLD_RAW; 2127 miocack(q, mp, 0, 0); 2128 return; 2129 } 2130 2131 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2132 if (dsp->ds_polling || dsp->ds_direct) { 2133 mac_perim_exit(mph); 2134 miocnak(q, mp, 0, EPROTO); 2135 return; 2136 } 2137 2138 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 2139 /* 2140 * Set the receive callback. 2141 */ 2142 dls_rx_set(dsp, dld_str_rx_raw, dsp); 2143 } 2144 2145 /* 2146 * Note that raw mode is enabled. 2147 */ 2148 dsp->ds_mode = DLD_RAW; 2149 mac_perim_exit(mph); 2150 2151 miocack(q, mp, 0, 0); 2152 } 2153 2154 /* 2155 * DLIOCHDRINFO 2156 */ 2157 static void 2158 ioc_fast(dld_str_t *dsp, mblk_t *mp) 2159 { 2160 dl_unitdata_req_t *dlp; 2161 off_t off; 2162 size_t len; 2163 const uint8_t *addr; 2164 uint16_t sap; 2165 mblk_t *nmp; 2166 mblk_t *hmp; 2167 uint_t addr_length; 2168 queue_t *q = dsp->ds_wq; 2169 int err; 2170 mac_perim_handle_t mph; 2171 2172 if (dld_opt & DLD_OPT_NO_FASTPATH) { 2173 err = ENOTSUP; 2174 goto failed; 2175 } 2176 2177 /* 2178 * DLIOCHDRINFO should only come from IP. The one initiated from 2179 * user-land should not be allowed. 2180 */ 2181 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 2182 err = EINVAL; 2183 goto failed; 2184 } 2185 2186 nmp = mp->b_cont; 2187 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 2188 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 2189 dlp->dl_primitive != DL_UNITDATA_REQ)) { 2190 err = EINVAL; 2191 goto failed; 2192 } 2193 2194 off = dlp->dl_dest_addr_offset; 2195 len = dlp->dl_dest_addr_length; 2196 2197 if (!MBLKIN(nmp, off, len)) { 2198 err = EINVAL; 2199 goto failed; 2200 } 2201 2202 if (dsp->ds_dlstate != DL_IDLE) { 2203 err = ENOTSUP; 2204 goto failed; 2205 } 2206 2207 addr_length = dsp->ds_mip->mi_addr_length; 2208 if (len != addr_length + sizeof (uint16_t)) { 2209 err = EINVAL; 2210 goto failed; 2211 } 2212 2213 addr = nmp->b_rptr + off; 2214 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2215 2216 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) { 2217 err = ENOMEM; 2218 goto failed; 2219 } 2220 2221 /* 2222 * This ioctl might happen concurrently with a direct call to dld_capab 2223 * that tries to enable direct and/or poll capabilities. Since the 2224 * stack does not serialize them, we do so here to avoid mixing 2225 * the callbacks. 2226 */ 2227 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2228 if (dsp->ds_mode != DLD_FASTPATH) { 2229 /* 2230 * Set the receive callback (unless polling is enabled). 2231 */ 2232 if (!dsp->ds_polling && !dsp->ds_direct) 2233 dls_rx_set(dsp, dld_str_rx_fastpath, dsp); 2234 2235 /* 2236 * Note that fast-path mode is enabled. 2237 */ 2238 dsp->ds_mode = DLD_FASTPATH; 2239 } 2240 mac_perim_exit(mph); 2241 2242 freemsg(nmp->b_cont); 2243 nmp->b_cont = hmp; 2244 2245 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2246 return; 2247 failed: 2248 miocnak(q, mp, 0, err); 2249 } 2250 2251 /* 2252 * DLIOCLOWLINK: request actual link state changes. When the 2253 * link is part of a bridge instance the client receives actual 2254 * link state changes and not the aggregate link status. Used by 2255 * the bridging daemon (bridged) for proper RSTP operation. 2256 */ 2257 static void 2258 ioc_lowlink(dld_str_t *dsp, mblk_t *mp) 2259 { 2260 queue_t *q = dsp->ds_wq; 2261 int err; 2262 2263 if ((err = miocpullup(mp, sizeof (int))) != 0) { 2264 miocnak(q, mp, 0, err); 2265 } else { 2266 /* LINTED: alignment */ 2267 dsp->ds_lowlink = *(boolean_t *)mp->b_cont->b_rptr; 2268 miocack(q, mp, 0, 0); 2269 } 2270 } 2271 2272 /* 2273 * Catch-all handler. 2274 */ 2275 static void 2276 ioc(dld_str_t *dsp, mblk_t *mp) 2277 { 2278 queue_t *q = dsp->ds_wq; 2279 2280 if (dsp->ds_dlstate == DL_UNATTACHED) { 2281 miocnak(q, mp, 0, EINVAL); 2282 return; 2283 } 2284 mac_ioctl(dsp->ds_mh, q, mp); 2285 } 2286