1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Data-Link Driver 27 */ 28 29 #include <inet/common.h> 30 #include <sys/strsubr.h> 31 #include <sys/stropts.h> 32 #include <sys/strsun.h> 33 #include <sys/vlan.h> 34 #include <sys/dld_impl.h> 35 #include <sys/cpuvar.h> 36 #include <sys/callb.h> 37 #include <sys/list.h> 38 #include <sys/mac_client.h> 39 #include <sys/mac_client_priv.h> 40 #include <sys/mac_flow.h> 41 42 static int str_constructor(void *, void *, int); 43 static void str_destructor(void *, void *); 44 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 45 static void str_notify_promisc_on_phys(dld_str_t *); 46 static void str_notify_promisc_off_phys(dld_str_t *); 47 static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *); 48 static void str_notify_link_up(dld_str_t *); 49 static void str_notify_link_down(dld_str_t *); 50 static void str_notify_capab_reneg(dld_str_t *); 51 static void str_notify_speed(dld_str_t *, uint32_t); 52 53 static void ioc_native(dld_str_t *, mblk_t *); 54 static void ioc_margin(dld_str_t *, mblk_t *); 55 static void ioc_raw(dld_str_t *, mblk_t *); 56 static void ioc_fast(dld_str_t *, mblk_t *); 57 static void ioc_lowlink(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static void dld_wput_nondata(dld_str_t *, mblk_t *); 61 62 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 63 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t, 64 link_tagmode_t); 65 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t); 66 67 static uint32_t str_count; 68 static kmem_cache_t *str_cachep; 69 static mod_hash_t *str_hashp; 70 71 #define STR_HASHSZ 64 72 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 73 74 #define dld_taskq system_taskq 75 76 static kmutex_t dld_taskq_lock; 77 static kcondvar_t dld_taskq_cv; 78 static list_t dld_taskq_list; /* List of dld_str_t */ 79 boolean_t dld_taskq_quit; 80 boolean_t dld_taskq_done; 81 82 static void dld_taskq_dispatch(void); 83 84 /* 85 * Some notes on entry points, flow-control, queueing. 86 * 87 * This driver exports the traditional STREAMS put entry point as well as 88 * the non-STREAMS fast-path transmit routine which is provided to IP via 89 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 90 * and data operations, while the fast-path routine deals only with M_DATA 91 * fast-path packets. Regardless of the entry point, all outbound packets 92 * will end up in DLD_TX(), where they will be delivered to the MAC layer. 93 * 94 * The transmit logic operates in the following way: All packets coming 95 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control 96 * happens when the MAC layer indicates the packets couldn't be 97 * transmitted due to 1) lack of resources (e.g. running out of 98 * descriptors), or 2) reaching the allowed bandwidth limit for this 99 * particular flow. The indication comes in the form of a Tx cookie that 100 * identifies the blocked ring. In such case, DLD will place a 101 * dummy message on its write-side STREAMS queue so that the queue is 102 * marked as "full". Any subsequent packets arriving at the driver will 103 * still be sent to the MAC layer where it either gets queued in the Tx 104 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS 105 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX. 106 * When the write service procedure runs, it will remove the dummy 107 * message from the write-side STREAMS queue; in effect this will trigger 108 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0, 109 * respectively, due to the above reasons. 110 * 111 * All non-data operations, both DLPI and ioctls are single threaded on a per 112 * dld_str_t endpoint. This is done using a taskq so that the control operation 113 * has kernel context and can cv_wait for resources. In addition all set type 114 * operations that involve mac level state modification are serialized on a 115 * per mac end point using the perimeter mechanism provided by the mac layer. 116 * This serializes all mac clients trying to modify a single mac end point over 117 * the entire sequence of mac calls made by that client as an atomic unit. The 118 * mac framework locking is described in mac.c. A critical element is that 119 * DLD/DLS does not hold any locks across the mac perimeter. 120 * 121 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 122 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 123 * match dev_t. If a stream is found and it is attached, its dev_info_t * 124 * is returned. If the mac handle is non-null, it can be safely accessed 125 * below. The mac handle won't be freed until the mac_unregister which 126 * won't happen until the driver detaches. The DDI framework ensures that 127 * the detach won't happen while a getinfo is in progress. 128 */ 129 typedef struct i_dld_str_state_s { 130 major_t ds_major; 131 minor_t ds_minor; 132 int ds_instance; 133 dev_info_t *ds_dip; 134 } i_dld_str_state_t; 135 136 /* ARGSUSED */ 137 static uint_t 138 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 139 { 140 i_dld_str_state_t *statep = arg; 141 dld_str_t *dsp = (dld_str_t *)val; 142 mac_handle_t mh; 143 144 if (statep->ds_major != dsp->ds_major) 145 return (MH_WALK_CONTINUE); 146 147 ASSERT(statep->ds_minor != 0); 148 mh = dsp->ds_mh; 149 150 if (statep->ds_minor == dsp->ds_minor) { 151 /* 152 * Clone: a clone minor is unique. we can terminate the 153 * walk if we find a matching stream -- even if we fail 154 * to obtain the devinfo. 155 */ 156 if (mh != NULL) { 157 statep->ds_dip = mac_devinfo_get(mh); 158 statep->ds_instance = DLS_MINOR2INST(mac_minor(mh)); 159 } 160 return (MH_WALK_TERMINATE); 161 } 162 return (MH_WALK_CONTINUE); 163 } 164 165 static dev_info_t * 166 dld_finddevinfo(dev_t dev) 167 { 168 dev_info_t *dip; 169 i_dld_str_state_t state; 170 171 if (getminor(dev) == 0) 172 return (NULL); 173 174 /* 175 * See if it's a minor node of a link 176 */ 177 if ((dip = dls_link_devinfo(dev)) != NULL) 178 return (dip); 179 180 state.ds_minor = getminor(dev); 181 state.ds_major = getmajor(dev); 182 state.ds_dip = NULL; 183 state.ds_instance = -1; 184 185 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 186 return (state.ds_dip); 187 } 188 189 int 190 dld_devt_to_instance(dev_t dev) 191 { 192 minor_t minor; 193 i_dld_str_state_t state; 194 195 /* 196 * GLDv3 numbers DLPI style 1 node as the instance number + 1. 197 * Minor number 0 is reserved for the DLPI style 2 unattached 198 * node. 199 */ 200 201 if ((minor = getminor(dev)) == 0) 202 return (-1); 203 204 /* 205 * Check for unopened style 1 node. 206 * Note that this doesn't *necessarily* work for legacy 207 * devices, but this code is only called within the 208 * getinfo(9e) implementation for true GLDv3 devices, so it 209 * doesn't matter. 210 */ 211 if (minor > 0 && minor <= DLS_MAX_MINOR) { 212 return (DLS_MINOR2INST(minor)); 213 } 214 215 state.ds_minor = getminor(dev); 216 state.ds_major = getmajor(dev); 217 state.ds_dip = NULL; 218 state.ds_instance = -1; 219 220 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 221 return (state.ds_instance); 222 } 223 224 /* 225 * devo_getinfo: getinfo(9e) 226 * 227 * NB: This may be called for a provider before the provider's 228 * instances are attached. Hence, if a particular provider needs a 229 * special mapping (the mac instance != ddi_get_instance()), then it 230 * may need to provide its own implmentation using the 231 * mac_devt_to_instance() function, and translating the returned mac 232 * instance to a devinfo instance. For dev_t's where the minor number 233 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this 234 * function indirectly via the mac_getinfo() function. 235 */ 236 /*ARGSUSED*/ 237 int 238 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 239 { 240 dev_info_t *devinfo; 241 minor_t minor = getminor((dev_t)arg); 242 int rc = DDI_FAILURE; 243 244 switch (cmd) { 245 case DDI_INFO_DEVT2DEVINFO: 246 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 247 *(dev_info_t **)resp = devinfo; 248 rc = DDI_SUCCESS; 249 } 250 break; 251 case DDI_INFO_DEVT2INSTANCE: 252 if (minor > 0 && minor <= DLS_MAX_MINOR) { 253 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 254 rc = DDI_SUCCESS; 255 } else if (minor > DLS_MAX_MINOR && 256 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 257 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 258 rc = DDI_SUCCESS; 259 } 260 break; 261 } 262 return (rc); 263 } 264 265 void * 266 dld_str_private(queue_t *q) 267 { 268 return (((dld_str_t *)(q->q_ptr))->ds_private); 269 } 270 271 int 272 dld_str_open(queue_t *rq, dev_t *devp, void *private) 273 { 274 dld_str_t *dsp; 275 major_t major; 276 minor_t minor; 277 int err; 278 279 major = getmajor(*devp); 280 minor = getminor(*devp); 281 282 /* 283 * Create a new dld_str_t for the stream. This will grab a new minor 284 * number that will be handed back in the cloned dev_t. Creation may 285 * fail if we can't allocate the dummy mblk used for flow-control. 286 */ 287 dsp = dld_str_create(rq, DLD_DLPI, major, 288 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 289 if (dsp == NULL) 290 return (ENOSR); 291 292 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 293 dsp->ds_private = private; 294 if (minor != 0) { 295 /* 296 * Style 1 open 297 */ 298 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 299 goto failed; 300 301 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 302 } else { 303 (void) qassociate(rq, -1); 304 } 305 306 /* 307 * Enable the queue srv(9e) routine. 308 */ 309 qprocson(rq); 310 311 /* 312 * Construct a cloned dev_t to hand back. 313 */ 314 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 315 return (0); 316 317 failed: 318 dld_str_destroy(dsp); 319 return (err); 320 } 321 322 int 323 dld_str_close(queue_t *rq) 324 { 325 dld_str_t *dsp = rq->q_ptr; 326 327 /* 328 * All modules on top have been popped off. So there can't be any 329 * threads from the top. 330 */ 331 ASSERT(dsp->ds_datathr_cnt == 0); 332 333 /* 334 * Wait until pending DLPI requests are processed. 335 */ 336 mutex_enter(&dsp->ds_lock); 337 while (dsp->ds_dlpi_pending) 338 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock); 339 mutex_exit(&dsp->ds_lock); 340 341 342 /* 343 * This stream was open to a provider node. Check to see 344 * if it has been cleanly shut down. 345 */ 346 if (dsp->ds_dlstate != DL_UNATTACHED) { 347 /* 348 * The stream is either open to a style 1 provider or 349 * this is not clean shutdown. Detach from the PPA. 350 * (This is still ok even in the style 1 case). 351 */ 352 dld_str_detach(dsp); 353 } 354 355 dld_str_destroy(dsp); 356 return (0); 357 } 358 359 /* 360 * qi_qopen: open(9e) 361 */ 362 /*ARGSUSED*/ 363 int 364 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 365 { 366 if (sflag == MODOPEN) 367 return (ENOTSUP); 368 369 /* 370 * This is a cloning driver and therefore each queue should only 371 * ever get opened once. 372 */ 373 if (rq->q_ptr != NULL) 374 return (EBUSY); 375 376 return (dld_str_open(rq, devp, NULL)); 377 } 378 379 /* 380 * qi_qclose: close(9e) 381 */ 382 int 383 dld_close(queue_t *rq) 384 { 385 /* 386 * Disable the queue srv(9e) routine. 387 */ 388 qprocsoff(rq); 389 390 return (dld_str_close(rq)); 391 } 392 393 /* 394 * qi_qputp: put(9e) 395 */ 396 void 397 dld_wput(queue_t *wq, mblk_t *mp) 398 { 399 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 400 dld_str_mode_t mode; 401 402 switch (DB_TYPE(mp)) { 403 case M_DATA: 404 mutex_enter(&dsp->ds_lock); 405 mode = dsp->ds_mode; 406 if ((dsp->ds_dlstate != DL_IDLE) || 407 (mode != DLD_FASTPATH && mode != DLD_RAW)) { 408 mutex_exit(&dsp->ds_lock); 409 freemsg(mp); 410 break; 411 } 412 413 DLD_DATATHR_INC(dsp); 414 mutex_exit(&dsp->ds_lock); 415 if (mode == DLD_FASTPATH) { 416 if (dsp->ds_mip->mi_media == DL_ETHER && 417 (MBLKL(mp) < sizeof (struct ether_header))) { 418 freemsg(mp); 419 } else { 420 (void) str_mdata_fastpath_put(dsp, mp, 0, 0); 421 } 422 } else { 423 str_mdata_raw_put(dsp, mp); 424 } 425 DLD_DATATHR_DCR(dsp); 426 break; 427 case M_PROTO: 428 case M_PCPROTO: { 429 t_uscalar_t prim; 430 431 if (MBLKL(mp) < sizeof (t_uscalar_t)) 432 break; 433 434 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 435 436 if (prim == DL_UNITDATA_REQ) { 437 proto_unitdata_req(dsp, mp); 438 } else { 439 dld_wput_nondata(dsp, mp); 440 } 441 break; 442 } 443 444 case M_IOCTL: 445 dld_wput_nondata(dsp, mp); 446 break; 447 448 case M_FLUSH: 449 if (*mp->b_rptr & FLUSHW) { 450 DLD_CLRQFULL(dsp); 451 *mp->b_rptr &= ~FLUSHW; 452 } 453 454 if (*mp->b_rptr & FLUSHR) { 455 qreply(wq, mp); 456 } else { 457 freemsg(mp); 458 } 459 break; 460 461 default: 462 freemsg(mp); 463 break; 464 } 465 } 466 467 /* 468 * qi_srvp: srv(9e) 469 */ 470 void 471 dld_wsrv(queue_t *wq) 472 { 473 dld_str_t *dsp = wq->q_ptr; 474 475 DLD_CLRQFULL(dsp); 476 } 477 478 void 479 dld_init_ops(struct dev_ops *ops, const char *name) 480 { 481 struct streamtab *stream; 482 struct qinit *rq, *wq; 483 struct module_info *modinfo; 484 485 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 486 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 487 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 488 modinfo->mi_minpsz = 0; 489 modinfo->mi_maxpsz = 64*1024; 490 modinfo->mi_hiwat = 1; 491 modinfo->mi_lowat = 0; 492 493 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 494 rq->qi_qopen = dld_open; 495 rq->qi_qclose = dld_close; 496 rq->qi_minfo = modinfo; 497 498 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 499 wq->qi_putp = (pfi_t)dld_wput; 500 wq->qi_srvp = (pfi_t)dld_wsrv; 501 wq->qi_minfo = modinfo; 502 503 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 504 stream->st_rdinit = rq; 505 stream->st_wrinit = wq; 506 ops->devo_cb_ops->cb_str = stream; 507 508 if (ops->devo_getinfo == NULL) 509 ops->devo_getinfo = &dld_getinfo; 510 } 511 512 void 513 dld_fini_ops(struct dev_ops *ops) 514 { 515 struct streamtab *stream; 516 struct qinit *rq, *wq; 517 struct module_info *modinfo; 518 519 stream = ops->devo_cb_ops->cb_str; 520 rq = stream->st_rdinit; 521 wq = stream->st_wrinit; 522 modinfo = rq->qi_minfo; 523 ASSERT(wq->qi_minfo == modinfo); 524 525 kmem_free(stream, sizeof (struct streamtab)); 526 kmem_free(wq, sizeof (struct qinit)); 527 kmem_free(rq, sizeof (struct qinit)); 528 kmem_free(modinfo->mi_idname, FMNAMESZ); 529 kmem_free(modinfo, sizeof (struct module_info)); 530 } 531 532 /* 533 * Initialize this module's data structures. 534 */ 535 void 536 dld_str_init(void) 537 { 538 /* 539 * Create dld_str_t object cache. 540 */ 541 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 542 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 543 ASSERT(str_cachep != NULL); 544 545 /* 546 * Create a hash table for maintaining dld_str_t's. 547 * The ds_minor field (the clone minor number) of a dld_str_t 548 * is used as a key for this hash table because this number is 549 * globally unique (allocated from "dls_minor_arena"). 550 */ 551 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 552 mod_hash_null_valdtor); 553 554 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL); 555 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL); 556 557 dld_taskq_quit = B_FALSE; 558 dld_taskq_done = B_FALSE; 559 list_create(&dld_taskq_list, sizeof (dld_str_t), 560 offsetof(dld_str_t, ds_tqlist)); 561 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0, 562 &p0, TS_RUN, minclsyspri); 563 } 564 565 /* 566 * Tear down this module's data structures. 567 */ 568 int 569 dld_str_fini(void) 570 { 571 /* 572 * Make sure that there are no objects in use. 573 */ 574 if (str_count != 0) 575 return (EBUSY); 576 577 /* 578 * Ask the dld_taskq thread to quit and wait for it to be done 579 */ 580 mutex_enter(&dld_taskq_lock); 581 dld_taskq_quit = B_TRUE; 582 cv_signal(&dld_taskq_cv); 583 while (!dld_taskq_done) 584 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 585 mutex_exit(&dld_taskq_lock); 586 list_destroy(&dld_taskq_list); 587 /* 588 * Destroy object cache. 589 */ 590 kmem_cache_destroy(str_cachep); 591 mod_hash_destroy_idhash(str_hashp); 592 return (0); 593 } 594 595 /* 596 * Create a new dld_str_t object. 597 */ 598 dld_str_t * 599 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 600 { 601 dld_str_t *dsp; 602 int err; 603 604 /* 605 * Allocate an object from the cache. 606 */ 607 atomic_add_32(&str_count, 1); 608 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 609 610 /* 611 * Allocate the dummy mblk for flow-control. 612 */ 613 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 614 if (dsp->ds_tx_flow_mp == NULL) { 615 kmem_cache_free(str_cachep, dsp); 616 atomic_add_32(&str_count, -1); 617 return (NULL); 618 } 619 dsp->ds_type = type; 620 dsp->ds_major = major; 621 dsp->ds_style = style; 622 623 /* 624 * Initialize the queue pointers. 625 */ 626 ASSERT(RD(rq) == rq); 627 dsp->ds_rq = rq; 628 dsp->ds_wq = WR(rq); 629 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 630 631 /* 632 * We want explicit control over our write-side STREAMS queue 633 * where the dummy mblk gets added/removed for flow-control. 634 */ 635 noenable(WR(rq)); 636 637 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 638 (mod_hash_val_t)dsp); 639 ASSERT(err == 0); 640 return (dsp); 641 } 642 643 /* 644 * Destroy a dld_str_t object. 645 */ 646 void 647 dld_str_destroy(dld_str_t *dsp) 648 { 649 queue_t *rq; 650 queue_t *wq; 651 mod_hash_val_t val; 652 653 /* 654 * Clear the queue pointers. 655 */ 656 rq = dsp->ds_rq; 657 wq = dsp->ds_wq; 658 ASSERT(wq == WR(rq)); 659 rq->q_ptr = wq->q_ptr = NULL; 660 dsp->ds_rq = dsp->ds_wq = NULL; 661 662 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 663 ASSERT(dsp->ds_sap == 0); 664 ASSERT(dsp->ds_mh == NULL); 665 ASSERT(dsp->ds_mch == NULL); 666 ASSERT(dsp->ds_promisc == 0); 667 ASSERT(dsp->ds_mph == NULL); 668 ASSERT(dsp->ds_mip == NULL); 669 ASSERT(dsp->ds_mnh == NULL); 670 671 ASSERT(dsp->ds_polling == B_FALSE); 672 ASSERT(dsp->ds_direct == B_FALSE); 673 ASSERT(dsp->ds_lso == B_FALSE); 674 ASSERT(dsp->ds_lso_max == 0); 675 ASSERT(dsp->ds_passivestate != DLD_ACTIVE); 676 677 /* 678 * Reinitialize all the flags. 679 */ 680 dsp->ds_notifications = 0; 681 dsp->ds_passivestate = DLD_UNINITIALIZED; 682 dsp->ds_mode = DLD_UNITDATA; 683 dsp->ds_native = B_FALSE; 684 dsp->ds_nonip = B_FALSE; 685 686 ASSERT(dsp->ds_datathr_cnt == 0); 687 ASSERT(dsp->ds_pending_head == NULL); 688 ASSERT(dsp->ds_pending_tail == NULL); 689 ASSERT(!dsp->ds_dlpi_pending); 690 691 ASSERT(dsp->ds_dlp == NULL); 692 ASSERT(dsp->ds_dmap == NULL); 693 ASSERT(dsp->ds_rx == NULL); 694 ASSERT(dsp->ds_rx_arg == NULL); 695 ASSERT(dsp->ds_next == NULL); 696 ASSERT(dsp->ds_head == NULL); 697 698 /* 699 * Free the dummy mblk if exists. 700 */ 701 if (dsp->ds_tx_flow_mp != NULL) { 702 freeb(dsp->ds_tx_flow_mp); 703 dsp->ds_tx_flow_mp = NULL; 704 } 705 706 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 707 ASSERT(dsp == (dld_str_t *)val); 708 709 /* 710 * Free the object back to the cache. 711 */ 712 kmem_cache_free(str_cachep, dsp); 713 atomic_add_32(&str_count, -1); 714 } 715 716 /* 717 * kmem_cache contructor function: see kmem_cache_create(9f). 718 */ 719 /*ARGSUSED*/ 720 static int 721 str_constructor(void *buf, void *cdrarg, int kmflags) 722 { 723 dld_str_t *dsp = buf; 724 725 bzero(buf, sizeof (dld_str_t)); 726 727 /* 728 * Allocate a new minor number. 729 */ 730 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) 731 return (-1); 732 733 /* 734 * Initialize the DLPI state machine. 735 */ 736 dsp->ds_dlstate = DL_UNATTACHED; 737 738 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL); 739 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL); 740 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL); 741 742 return (0); 743 } 744 745 /* 746 * kmem_cache destructor function. 747 */ 748 /*ARGSUSED*/ 749 static void 750 str_destructor(void *buf, void *cdrarg) 751 { 752 dld_str_t *dsp = buf; 753 754 /* 755 * Release the minor number. 756 */ 757 mac_minor_rele(dsp->ds_minor); 758 759 ASSERT(dsp->ds_tx_flow_mp == NULL); 760 761 mutex_destroy(&dsp->ds_lock); 762 cv_destroy(&dsp->ds_datathr_cv); 763 cv_destroy(&dsp->ds_dlpi_pending_cv); 764 } 765 766 /* 767 * Update the priority bits and VID (may need to insert tag if mp points 768 * to an untagged packet. 769 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 770 */ 771 static mblk_t * 772 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid, 773 link_tagmode_t tagmode) 774 { 775 mblk_t *hmp; 776 struct ether_vlan_header *evhp; 777 struct ether_header *ehp; 778 uint16_t old_tci = 0; 779 size_t len; 780 781 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 782 783 evhp = (struct ether_vlan_header *)mp->b_rptr; 784 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 785 /* 786 * Tagged packet, update the priority bits. 787 */ 788 len = sizeof (struct ether_vlan_header); 789 790 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 791 /* 792 * In case some drivers only check the db_ref 793 * count of the first mblk, we pullup the 794 * message into a single mblk. 795 */ 796 hmp = msgpullup(mp, -1); 797 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 798 freemsg(hmp); 799 return (NULL); 800 } else { 801 freemsg(mp); 802 mp = hmp; 803 } 804 } 805 806 evhp = (struct ether_vlan_header *)mp->b_rptr; 807 old_tci = ntohs(evhp->ether_tci); 808 } else { 809 /* 810 * Untagged packet. Two factors will cause us to insert a 811 * VLAN header: 812 * - This is a VLAN link (vid is specified) 813 * - The link supports user priority tagging and the priority 814 * is non-zero. 815 */ 816 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY) 817 return (mp); 818 819 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 820 if (hmp == NULL) 821 return (NULL); 822 823 evhp = (struct ether_vlan_header *)hmp->b_rptr; 824 ehp = (struct ether_header *)mp->b_rptr; 825 826 /* 827 * Copy the MAC addresses and typelen 828 */ 829 bcopy(ehp, evhp, (ETHERADDRL * 2)); 830 evhp->ether_type = ehp->ether_type; 831 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 832 833 hmp->b_wptr += sizeof (struct ether_vlan_header); 834 mp->b_rptr += sizeof (struct ether_header); 835 836 /* 837 * Free the original message if it's now empty. Link the 838 * rest of the messages to the header message. 839 */ 840 if (MBLKL(mp) == 0) { 841 hmp->b_cont = mp->b_cont; 842 freeb(mp); 843 } else { 844 hmp->b_cont = mp; 845 } 846 mp = hmp; 847 } 848 849 if (pri == 0) 850 pri = VLAN_PRI(old_tci); 851 if (vid == VLAN_ID_NONE) 852 vid = VLAN_ID(old_tci); 853 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 854 return (mp); 855 } 856 857 /* 858 * M_DATA put (IP fast-path mode) 859 */ 860 mac_tx_cookie_t 861 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint, 862 uint16_t flag) 863 { 864 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 865 mblk_t *newmp; 866 uint_t pri; 867 mac_tx_cookie_t cookie; 868 869 if (is_ethernet) { 870 /* 871 * Update the priority bits to the assigned priority. 872 */ 873 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 874 875 if (pri != 0) { 876 newmp = i_dld_ether_header_update_tag(mp, pri, 877 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode); 878 if (newmp == NULL) 879 goto discard; 880 mp = newmp; 881 } 882 } 883 884 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) { 885 DLD_SETQFULL(dsp); 886 } 887 return (cookie); 888 889 discard: 890 /* TODO: bump kstat? */ 891 freemsg(mp); 892 return (NULL); 893 } 894 895 /* 896 * M_DATA put (DLIOCRAW mode) 897 */ 898 static void 899 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 900 { 901 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 902 mblk_t *bp, *newmp; 903 size_t size; 904 mac_header_info_t mhi; 905 uint_t pri, vid, dvid; 906 uint_t max_sdu; 907 908 /* 909 * Certain MAC type plugins provide an illusion for raw DLPI 910 * consumers. They pretend that the MAC layer is something that 911 * it's not for the benefit of observability tools. For example, 912 * mac_wifi pretends that it's Ethernet for such consumers. 913 * Here, unless native mode is enabled, we call into the MAC layer so 914 * that this illusion can be maintained. The plugin will optionally 915 * transform the MAC header here into something that can be passed 916 * down. The header goes from raw mode to "cooked" mode. 917 */ 918 if (!dsp->ds_native) { 919 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 920 goto discard; 921 mp = newmp; 922 } 923 924 size = MBLKL(mp); 925 926 /* 927 * Check the packet is not too big and that any remaining 928 * fragment list is composed entirely of M_DATA messages. (We 929 * know the first fragment was M_DATA otherwise we could not 930 * have got here). 931 */ 932 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 933 if (DB_TYPE(bp) != M_DATA) 934 goto discard; 935 size += MBLKL(bp); 936 } 937 938 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0) 939 goto discard; 940 941 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 942 /* 943 * If LSO is enabled, check the size against lso_max. Otherwise, 944 * compare the packet size with max_sdu. 945 */ 946 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu; 947 if (size > max_sdu + mhi.mhi_hdrsize) 948 goto discard; 949 950 if (is_ethernet) { 951 dvid = mac_client_vid(dsp->ds_mch); 952 953 /* 954 * Discard the packet if this is a VLAN stream but the VID in 955 * the packet is not correct. 956 */ 957 vid = VLAN_ID(mhi.mhi_tci); 958 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 959 goto discard; 960 961 /* 962 * Discard the packet if this packet is a tagged packet 963 * but both pri and VID are 0. 964 */ 965 pri = VLAN_PRI(mhi.mhi_tci); 966 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 && 967 vid == VLAN_ID_NONE) 968 goto discard; 969 970 /* 971 * Update the priority bits to the per-stream priority if 972 * priority is not set in the packet. Update the VID for 973 * packets on a VLAN stream. 974 */ 975 pri = (pri == 0) ? dsp->ds_pri : 0; 976 if ((pri != 0) || (dvid != VLAN_ID_NONE)) { 977 if ((newmp = i_dld_ether_header_update_tag(mp, pri, 978 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) { 979 goto discard; 980 } 981 mp = newmp; 982 } 983 } 984 985 if (DLD_TX(dsp, mp, 0, 0) != NULL) { 986 /* Turn on flow-control for dld */ 987 DLD_SETQFULL(dsp); 988 } 989 return; 990 991 discard: 992 /* TODO: bump kstat? */ 993 freemsg(mp); 994 } 995 996 /* 997 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 998 */ 999 int 1000 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1001 { 1002 dev_t dev; 1003 int err; 1004 const char *drvname; 1005 mac_perim_handle_t mph = NULL; 1006 boolean_t qassociated = B_FALSE; 1007 dls_link_t *dlp = NULL; 1008 dls_dl_handle_t ddp = NULL; 1009 1010 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1011 return (EINVAL); 1012 1013 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) 1014 return (ENOTSUP); 1015 1016 /* 1017 * /dev node access. This will still be supported for backward 1018 * compatibility reason. 1019 */ 1020 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && 1021 (strcmp(drvname, "vnic") != 0)) { 1022 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1023 return (EINVAL); 1024 qassociated = B_TRUE; 1025 } 1026 1027 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); 1028 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0) 1029 goto failed; 1030 1031 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0) 1032 goto failed; 1033 1034 /* 1035 * Open a channel. 1036 */ 1037 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0) 1038 goto failed; 1039 1040 if ((err = dls_open(dlp, ddp, dsp)) != 0) 1041 goto failed; 1042 1043 /* 1044 * Set the default packet priority. 1045 */ 1046 dsp->ds_pri = 0; 1047 1048 /* 1049 * Add a notify function so that the we get updates from the MAC. 1050 */ 1051 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp); 1052 dsp->ds_dlstate = DL_UNBOUND; 1053 mac_perim_exit(mph); 1054 return (0); 1055 1056 failed: 1057 if (dlp != NULL) 1058 dls_link_rele(dlp); 1059 if (mph != NULL) 1060 mac_perim_exit(mph); 1061 if (ddp != NULL) 1062 dls_devnet_rele(ddp); 1063 if (qassociated) 1064 (void) qassociate(dsp->ds_wq, -1); 1065 1066 return (err); 1067 } 1068 1069 /* 1070 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1071 * from close(2) for style 2. 1072 */ 1073 void 1074 dld_str_detach(dld_str_t *dsp) 1075 { 1076 mac_perim_handle_t mph; 1077 int err; 1078 1079 ASSERT(dsp->ds_datathr_cnt == 0); 1080 1081 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 1082 /* 1083 * Remove the notify function. 1084 * 1085 * Note that we cannot wait for the notification callback to be removed 1086 * since it could cause the deadlock with str_notify() since they both 1087 * need the mac perimeter. Continue if we cannot remove the 1088 * notification callback right now and wait after we leave the 1089 * perimeter. 1090 */ 1091 err = mac_notify_remove(dsp->ds_mnh, B_FALSE); 1092 dsp->ds_mnh = NULL; 1093 1094 /* 1095 * Disable the capabilities 1096 */ 1097 dld_capabilities_disable(dsp); 1098 1099 /* 1100 * Clear LSO flags. 1101 */ 1102 dsp->ds_lso = B_FALSE; 1103 dsp->ds_lso_max = 0; 1104 1105 dls_close(dsp); 1106 mac_perim_exit(mph); 1107 1108 /* 1109 * Now we leave the mac perimeter. If mac_notify_remove() failed 1110 * because the notification callback was in progress, wait for 1111 * it to finish before we proceed. 1112 */ 1113 if (err != 0) 1114 mac_notify_remove_wait(dsp->ds_mh); 1115 1116 /* 1117 * An unreferenced tagged (non-persistent) vlan gets destroyed 1118 * automatically in the call to dls_devnet_rele. 1119 */ 1120 dls_devnet_rele(dsp->ds_ddh); 1121 1122 dsp->ds_sap = 0; 1123 dsp->ds_mh = NULL; 1124 dsp->ds_mch = NULL; 1125 dsp->ds_mip = NULL; 1126 1127 if (dsp->ds_style == DL_STYLE2) 1128 (void) qassociate(dsp->ds_wq, -1); 1129 1130 /* 1131 * Re-initialize the DLPI state machine. 1132 */ 1133 dsp->ds_dlstate = DL_UNATTACHED; 1134 } 1135 1136 /* 1137 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1138 * tags before sending packets up to the DLS clients, with the exception of 1139 * special priority tagged packets, in that case, we set the VID to 0. 1140 * mp must be a VLAN tagged packet. 1141 */ 1142 static mblk_t * 1143 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri) 1144 { 1145 mblk_t *newmp; 1146 struct ether_vlan_header *evhp; 1147 uint16_t tci, new_tci; 1148 1149 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1150 if (DB_REF(mp) > 1) { 1151 newmp = copymsg(mp); 1152 if (newmp == NULL) 1153 return (NULL); 1154 freemsg(mp); 1155 mp = newmp; 1156 } 1157 evhp = (struct ether_vlan_header *)mp->b_rptr; 1158 1159 tci = ntohs(evhp->ether_tci); 1160 if (VLAN_PRI(tci) == 0 || !keep_pri) { 1161 /* 1162 * Priority is 0, strip the tag. 1163 */ 1164 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1165 mp->b_rptr += VLAN_TAGSZ; 1166 } else { 1167 /* 1168 * Priority is not 0, update the VID to 0. 1169 */ 1170 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1171 evhp->ether_tci = htons(new_tci); 1172 } 1173 return (mp); 1174 } 1175 1176 /* 1177 * Raw mode receive function. 1178 */ 1179 /*ARGSUSED*/ 1180 void 1181 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1182 mac_header_info_t *mhip) 1183 { 1184 dld_str_t *dsp = (dld_str_t *)arg; 1185 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1186 mblk_t *next, *newmp; 1187 1188 ASSERT(mp != NULL); 1189 do { 1190 /* 1191 * Get the pointer to the next packet in the chain and then 1192 * clear b_next before the packet gets passed on. 1193 */ 1194 next = mp->b_next; 1195 mp->b_next = NULL; 1196 1197 /* 1198 * Wind back b_rptr to point at the MAC header. 1199 */ 1200 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1201 mp->b_rptr -= mhip->mhi_hdrsize; 1202 1203 /* 1204 * Certain MAC type plugins provide an illusion for raw 1205 * DLPI consumers. They pretend that the MAC layer is 1206 * something that it's not for the benefit of observability 1207 * tools. For example, mac_wifi pretends that it's Ethernet 1208 * for such consumers. Here, unless native mode is enabled, 1209 * we call into the MAC layer so that this illusion can be 1210 * maintained. The plugin will optionally transform the MAC 1211 * header here into something that can be passed up to raw 1212 * consumers. The header goes from "cooked" mode to raw mode. 1213 */ 1214 if (!dsp->ds_native) { 1215 newmp = mac_header_uncook(dsp->ds_mh, mp); 1216 if (newmp == NULL) { 1217 freemsg(mp); 1218 goto next; 1219 } 1220 mp = newmp; 1221 } 1222 1223 /* 1224 * Strip the VLAN tag for VLAN streams. 1225 */ 1226 if (is_ethernet && 1227 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) { 1228 /* 1229 * The priority should be kept only for VLAN 1230 * data-links. 1231 */ 1232 newmp = i_dld_ether_header_strip_tag(mp, 1233 mac_client_is_vlan_vnic(dsp->ds_mch)); 1234 if (newmp == NULL) { 1235 freemsg(mp); 1236 goto next; 1237 } 1238 mp = newmp; 1239 } 1240 1241 /* 1242 * Pass the packet on. 1243 */ 1244 if (canputnext(dsp->ds_rq)) 1245 putnext(dsp->ds_rq, mp); 1246 else 1247 freemsg(mp); 1248 1249 next: 1250 /* 1251 * Move on to the next packet in the chain. 1252 */ 1253 mp = next; 1254 } while (mp != NULL); 1255 } 1256 1257 /* 1258 * Fast-path receive function. 1259 */ 1260 /*ARGSUSED*/ 1261 void 1262 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1263 mac_header_info_t *mhip) 1264 { 1265 dld_str_t *dsp = (dld_str_t *)arg; 1266 mblk_t *next; 1267 size_t offset = 0; 1268 1269 /* 1270 * MAC header stripping rules: 1271 * - Tagged packets: 1272 * a. VLAN streams. Strip the whole VLAN header including the tag. 1273 * b. Physical streams 1274 * - VLAN packets (non-zero VID). The stream must be either a 1275 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1276 * Strip the Ethernet header but keep the VLAN header. 1277 * - Special tagged packets (zero VID) 1278 * * The stream is either a DL_PROMISC_SAP listener or a 1279 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1280 * keep the VLAN header. 1281 * * Otherwise, strip the whole VLAN header. 1282 * - Untagged packets. Strip the whole MAC header. 1283 */ 1284 if (mhip->mhi_istagged && 1285 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1286 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1287 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1288 offset = VLAN_TAGSZ; 1289 } 1290 1291 ASSERT(mp != NULL); 1292 do { 1293 /* 1294 * Get the pointer to the next packet in the chain and then 1295 * clear b_next before the packet gets passed on. 1296 */ 1297 next = mp->b_next; 1298 mp->b_next = NULL; 1299 1300 /* 1301 * Wind back b_rptr to point at the VLAN header. 1302 */ 1303 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1304 mp->b_rptr -= offset; 1305 1306 /* 1307 * Pass the packet on. 1308 */ 1309 if (canputnext(dsp->ds_rq)) 1310 putnext(dsp->ds_rq, mp); 1311 else 1312 freemsg(mp); 1313 /* 1314 * Move on to the next packet in the chain. 1315 */ 1316 mp = next; 1317 } while (mp != NULL); 1318 } 1319 1320 /* 1321 * Default receive function (send DL_UNITDATA_IND messages). 1322 */ 1323 /*ARGSUSED*/ 1324 void 1325 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1326 mac_header_info_t *mhip) 1327 { 1328 dld_str_t *dsp = (dld_str_t *)arg; 1329 mblk_t *ud_mp; 1330 mblk_t *next; 1331 size_t offset = 0; 1332 boolean_t strip_vlan = B_TRUE; 1333 1334 /* 1335 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1336 */ 1337 if (mhip->mhi_istagged && 1338 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1339 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1340 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1341 offset = VLAN_TAGSZ; 1342 strip_vlan = B_FALSE; 1343 } 1344 1345 ASSERT(mp != NULL); 1346 do { 1347 /* 1348 * Get the pointer to the next packet in the chain and then 1349 * clear b_next before the packet gets passed on. 1350 */ 1351 next = mp->b_next; 1352 mp->b_next = NULL; 1353 1354 /* 1355 * Wind back b_rptr to point at the MAC header. 1356 */ 1357 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1358 mp->b_rptr -= mhip->mhi_hdrsize; 1359 1360 /* 1361 * Create the DL_UNITDATA_IND M_PROTO. 1362 */ 1363 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1364 freemsgchain(mp); 1365 return; 1366 } 1367 1368 /* 1369 * Advance b_rptr to point at the payload (or the VLAN header). 1370 */ 1371 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1372 1373 /* 1374 * Prepend the DL_UNITDATA_IND. 1375 */ 1376 ud_mp->b_cont = mp; 1377 1378 /* 1379 * Send the message. 1380 */ 1381 if (canputnext(dsp->ds_rq)) 1382 putnext(dsp->ds_rq, ud_mp); 1383 else 1384 freemsg(ud_mp); 1385 1386 /* 1387 * Move on to the next packet in the chain. 1388 */ 1389 mp = next; 1390 } while (mp != NULL); 1391 } 1392 1393 /* 1394 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE 1395 */ 1396 static void 1397 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu) 1398 { 1399 mblk_t *mp; 1400 dl_notify_ind_t *dlip; 1401 1402 if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2))) 1403 return; 1404 1405 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1406 M_PROTO, 0)) == NULL) 1407 return; 1408 1409 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1410 dlip = (dl_notify_ind_t *)mp->b_rptr; 1411 dlip->dl_primitive = DL_NOTIFY_IND; 1412 if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) { 1413 dlip->dl_notification = DL_NOTE_SDU_SIZE2; 1414 dlip->dl_data1 = max_sdu; 1415 dlip->dl_data2 = multicast_sdu; 1416 } else { 1417 dlip->dl_notification = DL_NOTE_SDU_SIZE; 1418 dlip->dl_data = max_sdu; 1419 } 1420 1421 qreply(dsp->ds_wq, mp); 1422 } 1423 1424 /* 1425 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1426 * current state of the interface. 1427 */ 1428 void 1429 dld_str_notify_ind(dld_str_t *dsp) 1430 { 1431 mac_notify_type_t type; 1432 1433 for (type = 0; type < MAC_NNOTE; type++) 1434 str_notify(dsp, type); 1435 } 1436 1437 typedef struct dl_unitdata_ind_wrapper { 1438 dl_unitdata_ind_t dl_unitdata; 1439 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1440 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1441 } dl_unitdata_ind_wrapper_t; 1442 1443 /* 1444 * Create a DL_UNITDATA_IND M_PROTO message. 1445 */ 1446 static mblk_t * 1447 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1448 { 1449 mblk_t *nmp; 1450 dl_unitdata_ind_wrapper_t *dlwp; 1451 dl_unitdata_ind_t *dlp; 1452 mac_header_info_t mhi; 1453 uint_t addr_length; 1454 uint8_t *daddr; 1455 uint8_t *saddr; 1456 1457 /* 1458 * Get the packet header information. 1459 */ 1460 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0) 1461 return (NULL); 1462 1463 /* 1464 * Allocate a message large enough to contain the wrapper structure 1465 * defined above. 1466 */ 1467 if ((nmp = mexchange(dsp->ds_wq, NULL, 1468 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1469 DL_UNITDATA_IND)) == NULL) 1470 return (NULL); 1471 1472 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1473 1474 dlp = &(dlwp->dl_unitdata); 1475 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1476 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1477 1478 /* 1479 * Copy in the destination address. 1480 */ 1481 addr_length = dsp->ds_mip->mi_addr_length; 1482 daddr = dlwp->dl_dest_addr; 1483 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1484 bcopy(mhi.mhi_daddr, daddr, addr_length); 1485 1486 /* 1487 * Set the destination DLSAP to the SAP value encoded in the packet. 1488 */ 1489 if (mhi.mhi_istagged && !strip_vlan) 1490 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1491 else 1492 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1493 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1494 1495 /* 1496 * If the destination address was multicast or broadcast then the 1497 * dl_group_address field should be non-zero. 1498 */ 1499 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1500 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1501 1502 /* 1503 * Copy in the source address if one exists. Some MAC types (DL_IB 1504 * for example) may not have access to source information. 1505 */ 1506 if (mhi.mhi_saddr == NULL) { 1507 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1508 } else { 1509 saddr = dlwp->dl_src_addr; 1510 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1511 bcopy(mhi.mhi_saddr, saddr, addr_length); 1512 1513 /* 1514 * Set the source DLSAP to the packet ethertype. 1515 */ 1516 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1517 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1518 } 1519 1520 return (nmp); 1521 } 1522 1523 /* 1524 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1525 */ 1526 static void 1527 str_notify_promisc_on_phys(dld_str_t *dsp) 1528 { 1529 mblk_t *mp; 1530 dl_notify_ind_t *dlip; 1531 1532 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1533 return; 1534 1535 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1536 M_PROTO, 0)) == NULL) 1537 return; 1538 1539 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1540 dlip = (dl_notify_ind_t *)mp->b_rptr; 1541 dlip->dl_primitive = DL_NOTIFY_IND; 1542 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1543 1544 qreply(dsp->ds_wq, mp); 1545 } 1546 1547 /* 1548 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1549 */ 1550 static void 1551 str_notify_promisc_off_phys(dld_str_t *dsp) 1552 { 1553 mblk_t *mp; 1554 dl_notify_ind_t *dlip; 1555 1556 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1557 return; 1558 1559 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1560 M_PROTO, 0)) == NULL) 1561 return; 1562 1563 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1564 dlip = (dl_notify_ind_t *)mp->b_rptr; 1565 dlip->dl_primitive = DL_NOTIFY_IND; 1566 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1567 1568 qreply(dsp->ds_wq, mp); 1569 } 1570 1571 /* 1572 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1573 */ 1574 static void 1575 str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr) 1576 { 1577 mblk_t *mp; 1578 dl_notify_ind_t *dlip; 1579 uint_t addr_length; 1580 uint16_t ethertype; 1581 1582 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1583 return; 1584 1585 addr_length = dsp->ds_mip->mi_addr_length; 1586 if ((mp = mexchange(dsp->ds_wq, NULL, 1587 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1588 M_PROTO, 0)) == NULL) 1589 return; 1590 1591 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1592 dlip = (dl_notify_ind_t *)mp->b_rptr; 1593 dlip->dl_primitive = DL_NOTIFY_IND; 1594 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1595 dlip->dl_data = addr_type; 1596 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1597 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1598 1599 bcopy(addr, &dlip[1], addr_length); 1600 1601 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1602 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1603 1604 qreply(dsp->ds_wq, mp); 1605 } 1606 1607 /* 1608 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1609 */ 1610 static void 1611 str_notify_link_up(dld_str_t *dsp) 1612 { 1613 mblk_t *mp; 1614 dl_notify_ind_t *dlip; 1615 1616 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1617 return; 1618 1619 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1620 M_PROTO, 0)) == NULL) 1621 return; 1622 1623 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1624 dlip = (dl_notify_ind_t *)mp->b_rptr; 1625 dlip->dl_primitive = DL_NOTIFY_IND; 1626 dlip->dl_notification = DL_NOTE_LINK_UP; 1627 1628 qreply(dsp->ds_wq, mp); 1629 } 1630 1631 /* 1632 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1633 */ 1634 static void 1635 str_notify_link_down(dld_str_t *dsp) 1636 { 1637 mblk_t *mp; 1638 dl_notify_ind_t *dlip; 1639 1640 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1641 return; 1642 1643 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1644 M_PROTO, 0)) == NULL) 1645 return; 1646 1647 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1648 dlip = (dl_notify_ind_t *)mp->b_rptr; 1649 dlip->dl_primitive = DL_NOTIFY_IND; 1650 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1651 1652 qreply(dsp->ds_wq, mp); 1653 } 1654 1655 /* 1656 * DL_NOTIFY_IND: DL_NOTE_SPEED 1657 */ 1658 static void 1659 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1660 { 1661 mblk_t *mp; 1662 dl_notify_ind_t *dlip; 1663 1664 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1665 return; 1666 1667 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1668 M_PROTO, 0)) == NULL) 1669 return; 1670 1671 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1672 dlip = (dl_notify_ind_t *)mp->b_rptr; 1673 dlip->dl_primitive = DL_NOTIFY_IND; 1674 dlip->dl_notification = DL_NOTE_SPEED; 1675 dlip->dl_data = speed; 1676 1677 qreply(dsp->ds_wq, mp); 1678 } 1679 1680 /* 1681 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1682 */ 1683 static void 1684 str_notify_capab_reneg(dld_str_t *dsp) 1685 { 1686 mblk_t *mp; 1687 dl_notify_ind_t *dlip; 1688 1689 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1690 return; 1691 1692 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1693 M_PROTO, 0)) == NULL) 1694 return; 1695 1696 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1697 dlip = (dl_notify_ind_t *)mp->b_rptr; 1698 dlip->dl_primitive = DL_NOTIFY_IND; 1699 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1700 1701 qreply(dsp->ds_wq, mp); 1702 } 1703 1704 /* 1705 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1706 */ 1707 static void 1708 str_notify_fastpath_flush(dld_str_t *dsp) 1709 { 1710 mblk_t *mp; 1711 dl_notify_ind_t *dlip; 1712 1713 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1714 return; 1715 1716 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1717 M_PROTO, 0)) == NULL) 1718 return; 1719 1720 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1721 dlip = (dl_notify_ind_t *)mp->b_rptr; 1722 dlip->dl_primitive = DL_NOTIFY_IND; 1723 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1724 1725 qreply(dsp->ds_wq, mp); 1726 } 1727 1728 static void 1729 str_notify_allowed_ips(dld_str_t *dsp) 1730 { 1731 mblk_t *mp; 1732 dl_notify_ind_t *dlip; 1733 size_t mp_size; 1734 mac_protect_t *mrp; 1735 1736 if (!(dsp->ds_notifications & DL_NOTE_ALLOWED_IPS)) 1737 return; 1738 1739 mp_size = sizeof (mac_protect_t) + sizeof (dl_notify_ind_t); 1740 if ((mp = mexchange(dsp->ds_wq, NULL, mp_size, M_PROTO, 0)) == NULL) 1741 return; 1742 1743 mrp = mac_protect_get(dsp->ds_mh); 1744 bzero(mp->b_rptr, mp_size); 1745 dlip = (dl_notify_ind_t *)mp->b_rptr; 1746 dlip->dl_primitive = DL_NOTIFY_IND; 1747 dlip->dl_notification = DL_NOTE_ALLOWED_IPS; 1748 dlip->dl_data = 0; 1749 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1750 dlip->dl_addr_length = sizeof (mac_protect_t); 1751 bcopy(mrp, mp->b_rptr + sizeof (dl_notify_ind_t), 1752 sizeof (mac_protect_t)); 1753 1754 qreply(dsp->ds_wq, mp); 1755 } 1756 1757 /* 1758 * MAC notification callback. 1759 */ 1760 void 1761 str_notify(void *arg, mac_notify_type_t type) 1762 { 1763 dld_str_t *dsp = (dld_str_t *)arg; 1764 queue_t *q = dsp->ds_wq; 1765 mac_handle_t mh = dsp->ds_mh; 1766 mac_client_handle_t mch = dsp->ds_mch; 1767 uint8_t addr[MAXMACADDRLEN]; 1768 1769 switch (type) { 1770 case MAC_NOTE_TX: 1771 qenable(q); 1772 break; 1773 1774 case MAC_NOTE_DEVPROMISC: 1775 /* 1776 * Send the appropriate DL_NOTIFY_IND. 1777 */ 1778 if (mac_promisc_get(mh)) 1779 str_notify_promisc_on_phys(dsp); 1780 else 1781 str_notify_promisc_off_phys(dsp); 1782 break; 1783 1784 case MAC_NOTE_UNICST: 1785 /* 1786 * This notification is sent whenever the MAC unicast 1787 * address changes. 1788 */ 1789 mac_unicast_primary_get(mh, addr); 1790 1791 /* 1792 * Send the appropriate DL_NOTIFY_IND. 1793 */ 1794 str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr); 1795 break; 1796 1797 case MAC_NOTE_DEST: 1798 /* 1799 * Only send up DL_NOTE_DEST_ADDR if the link has a 1800 * destination address. 1801 */ 1802 if (mac_dst_get(dsp->ds_mh, addr)) 1803 str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr); 1804 break; 1805 1806 case MAC_NOTE_LOWLINK: 1807 case MAC_NOTE_LINK: 1808 /* 1809 * LOWLINK refers to the actual link status. For links that 1810 * are not part of a bridge instance LOWLINK and LINK state 1811 * are the same. But for a link part of a bridge instance 1812 * LINK state refers to the aggregate link status: "up" when 1813 * at least one link part of the bridge is up and is "down" 1814 * when all links part of the bridge are down. 1815 * 1816 * Clients can request to be notified of the LOWLINK state 1817 * using the DLIOCLOWLINK ioctl. Clients such as the bridge 1818 * daemon request lowlink state changes and upper layer clients 1819 * receive notifications of the aggregate link state changes 1820 * which is the default when requesting LINK UP/DOWN state 1821 * notifications. 1822 */ 1823 1824 /* 1825 * Check that the notification type matches the one that we 1826 * want. If we want lower-level link notifications, and this 1827 * is upper, or if we want upper and this is lower, then 1828 * ignore. 1829 */ 1830 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink) 1831 break; 1832 /* 1833 * This notification is sent every time the MAC driver 1834 * updates the link state. 1835 */ 1836 switch (mac_client_stat_get(mch, dsp->ds_lowlink ? 1837 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) { 1838 case LINK_STATE_UP: { 1839 uint64_t speed; 1840 /* 1841 * The link is up so send the appropriate 1842 * DL_NOTIFY_IND. 1843 */ 1844 str_notify_link_up(dsp); 1845 1846 speed = mac_stat_get(mh, MAC_STAT_IFSPEED); 1847 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1848 break; 1849 } 1850 case LINK_STATE_DOWN: 1851 /* 1852 * The link is down so send the appropriate 1853 * DL_NOTIFY_IND. 1854 */ 1855 str_notify_link_down(dsp); 1856 break; 1857 1858 default: 1859 break; 1860 } 1861 break; 1862 1863 case MAC_NOTE_CAPAB_CHG: 1864 /* 1865 * This notification is sent whenever the MAC resources 1866 * change or capabilities change. We need to renegotiate 1867 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1868 */ 1869 str_notify_capab_reneg(dsp); 1870 break; 1871 1872 case MAC_NOTE_SDU_SIZE: { 1873 uint_t max_sdu; 1874 uint_t multicast_sdu; 1875 mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu); 1876 str_notify_sdu_size(dsp, max_sdu, multicast_sdu); 1877 break; 1878 } 1879 1880 case MAC_NOTE_FASTPATH_FLUSH: 1881 str_notify_fastpath_flush(dsp); 1882 break; 1883 1884 /* Unused notifications */ 1885 case MAC_NOTE_MARGIN: 1886 break; 1887 1888 case MAC_NOTE_ALLOWED_IPS: 1889 str_notify_allowed_ips(dsp); 1890 break; 1891 1892 default: 1893 ASSERT(B_FALSE); 1894 break; 1895 } 1896 } 1897 1898 /* 1899 * This function is called via a taskq mechansim to process all control 1900 * messages on a per 'dsp' end point. 1901 */ 1902 static void 1903 dld_wput_nondata_task(void *arg) 1904 { 1905 dld_str_t *dsp = arg; 1906 mblk_t *mp; 1907 1908 mutex_enter(&dsp->ds_lock); 1909 while (dsp->ds_pending_head != NULL) { 1910 mp = dsp->ds_pending_head; 1911 dsp->ds_pending_head = mp->b_next; 1912 mp->b_next = NULL; 1913 if (dsp->ds_pending_head == NULL) 1914 dsp->ds_pending_tail = NULL; 1915 mutex_exit(&dsp->ds_lock); 1916 1917 switch (DB_TYPE(mp)) { 1918 case M_PROTO: 1919 case M_PCPROTO: 1920 dld_proto(dsp, mp); 1921 break; 1922 case M_IOCTL: 1923 dld_ioc(dsp, mp); 1924 break; 1925 default: 1926 ASSERT(0); 1927 } 1928 1929 mutex_enter(&dsp->ds_lock); 1930 } 1931 ASSERT(dsp->ds_pending_tail == NULL); 1932 dsp->ds_dlpi_pending = 0; 1933 cv_broadcast(&dsp->ds_dlpi_pending_cv); 1934 mutex_exit(&dsp->ds_lock); 1935 } 1936 1937 /* 1938 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This 1939 * thread is started at boot time. 1940 */ 1941 static void 1942 dld_taskq_dispatch(void) 1943 { 1944 callb_cpr_t cprinfo; 1945 dld_str_t *dsp; 1946 1947 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr, 1948 "dld_taskq_dispatch"); 1949 mutex_enter(&dld_taskq_lock); 1950 1951 while (!dld_taskq_quit) { 1952 dsp = list_head(&dld_taskq_list); 1953 while (dsp != NULL) { 1954 list_remove(&dld_taskq_list, dsp); 1955 mutex_exit(&dld_taskq_lock); 1956 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task, 1957 dsp, TQ_SLEEP) != 0); 1958 mutex_enter(&dld_taskq_lock); 1959 dsp = list_head(&dld_taskq_list); 1960 } 1961 1962 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1963 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 1964 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock); 1965 } 1966 1967 dld_taskq_done = B_TRUE; 1968 cv_signal(&dld_taskq_cv); 1969 CALLB_CPR_EXIT(&cprinfo); 1970 thread_exit(); 1971 } 1972 1973 /* 1974 * All control operations are serialized on the 'dsp' and are also funneled 1975 * through a taskq mechanism to ensure that subsequent processing has kernel 1976 * context and can safely use cv_wait. 1977 * 1978 * Mechanisms to handle taskq dispatch failures 1979 * 1980 * The only way to be sure that taskq dispatch does not fail is to either 1981 * specify TQ_SLEEP or to use a static taskq and prepopulate it with 1982 * some number of entries and make sure that the number of outstanding requests 1983 * are less than that number. We can't use TQ_SLEEP since we don't know the 1984 * context. Nor can we bound the total number of 'dsp' end points. So we are 1985 * unable to use either of the above schemes, and are forced to deal with 1986 * taskq dispatch failures. Note that even dynamic taskq could fail in 1987 * dispatch if TQ_NOSLEEP is specified, since this flag is translated 1988 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq 1989 * framework. 1990 * 1991 * We maintain a queue of 'dsp's that encountered taskq dispatch failure. 1992 * We also have a single global thread to retry the taskq dispatch. This 1993 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but 1994 * uses TQ_SLEEP to ensure eventual success of the dispatch operation. 1995 */ 1996 static void 1997 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) 1998 { 1999 ASSERT(mp->b_next == NULL); 2000 mutex_enter(&dsp->ds_lock); 2001 if (dsp->ds_pending_head != NULL) { 2002 ASSERT(dsp->ds_dlpi_pending); 2003 dsp->ds_pending_tail->b_next = mp; 2004 dsp->ds_pending_tail = mp; 2005 mutex_exit(&dsp->ds_lock); 2006 return; 2007 } 2008 ASSERT(dsp->ds_pending_tail == NULL); 2009 dsp->ds_pending_head = dsp->ds_pending_tail = mp; 2010 /* 2011 * At this point if ds_dlpi_pending is set, it implies that the taskq 2012 * thread is still active and is processing the last message, though 2013 * the pending queue has been emptied. 2014 */ 2015 if (dsp->ds_dlpi_pending) { 2016 mutex_exit(&dsp->ds_lock); 2017 return; 2018 } 2019 2020 dsp->ds_dlpi_pending = 1; 2021 mutex_exit(&dsp->ds_lock); 2022 2023 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp, 2024 TQ_NOSLEEP) != 0) 2025 return; 2026 2027 mutex_enter(&dld_taskq_lock); 2028 list_insert_tail(&dld_taskq_list, dsp); 2029 cv_signal(&dld_taskq_cv); 2030 mutex_exit(&dld_taskq_lock); 2031 } 2032 2033 /* 2034 * Process an M_IOCTL message. 2035 */ 2036 static void 2037 dld_ioc(dld_str_t *dsp, mblk_t *mp) 2038 { 2039 uint_t cmd; 2040 2041 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 2042 ASSERT(dsp->ds_type == DLD_DLPI); 2043 2044 switch (cmd) { 2045 case DLIOCNATIVE: 2046 ioc_native(dsp, mp); 2047 break; 2048 case DLIOCMARGININFO: 2049 ioc_margin(dsp, mp); 2050 break; 2051 case DLIOCRAW: 2052 ioc_raw(dsp, mp); 2053 break; 2054 case DLIOCHDRINFO: 2055 ioc_fast(dsp, mp); 2056 break; 2057 case DLIOCLOWLINK: 2058 ioc_lowlink(dsp, mp); 2059 break; 2060 default: 2061 ioc(dsp, mp); 2062 } 2063 } 2064 2065 /* 2066 * DLIOCNATIVE 2067 */ 2068 static void 2069 ioc_native(dld_str_t *dsp, mblk_t *mp) 2070 { 2071 queue_t *q = dsp->ds_wq; 2072 const mac_info_t *mip = dsp->ds_mip; 2073 2074 /* 2075 * Native mode can be enabled if it's disabled and if the 2076 * native media type is different. 2077 */ 2078 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 2079 dsp->ds_native = B_TRUE; 2080 2081 if (dsp->ds_native) 2082 miocack(q, mp, 0, mip->mi_nativemedia); 2083 else 2084 miocnak(q, mp, 0, ENOTSUP); 2085 } 2086 2087 /* 2088 * DLIOCMARGININFO 2089 */ 2090 static void 2091 ioc_margin(dld_str_t *dsp, mblk_t *mp) 2092 { 2093 queue_t *q = dsp->ds_wq; 2094 uint32_t margin; 2095 int err; 2096 2097 if (dsp->ds_dlstate == DL_UNATTACHED) { 2098 err = EINVAL; 2099 goto failed; 2100 } 2101 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) 2102 goto failed; 2103 2104 mac_margin_get(dsp->ds_mh, &margin); 2105 *((uint32_t *)mp->b_cont->b_rptr) = margin; 2106 miocack(q, mp, sizeof (uint32_t), 0); 2107 return; 2108 2109 failed: 2110 miocnak(q, mp, 0, err); 2111 } 2112 2113 /* 2114 * DLIOCRAW 2115 */ 2116 static void 2117 ioc_raw(dld_str_t *dsp, mblk_t *mp) 2118 { 2119 queue_t *q = dsp->ds_wq; 2120 mac_perim_handle_t mph; 2121 2122 if (dsp->ds_mh == NULL) { 2123 dsp->ds_mode = DLD_RAW; 2124 miocack(q, mp, 0, 0); 2125 return; 2126 } 2127 2128 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2129 if (dsp->ds_polling || dsp->ds_direct) { 2130 mac_perim_exit(mph); 2131 miocnak(q, mp, 0, EPROTO); 2132 return; 2133 } 2134 2135 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 2136 /* 2137 * Set the receive callback. 2138 */ 2139 dls_rx_set(dsp, dld_str_rx_raw, dsp); 2140 } 2141 2142 /* 2143 * Note that raw mode is enabled. 2144 */ 2145 dsp->ds_mode = DLD_RAW; 2146 mac_perim_exit(mph); 2147 2148 miocack(q, mp, 0, 0); 2149 } 2150 2151 /* 2152 * DLIOCHDRINFO 2153 */ 2154 static void 2155 ioc_fast(dld_str_t *dsp, mblk_t *mp) 2156 { 2157 dl_unitdata_req_t *dlp; 2158 off_t off; 2159 size_t len; 2160 const uint8_t *addr; 2161 uint16_t sap; 2162 mblk_t *nmp; 2163 mblk_t *hmp; 2164 uint_t addr_length; 2165 queue_t *q = dsp->ds_wq; 2166 int err; 2167 mac_perim_handle_t mph; 2168 2169 if (dld_opt & DLD_OPT_NO_FASTPATH) { 2170 err = ENOTSUP; 2171 goto failed; 2172 } 2173 2174 /* 2175 * DLIOCHDRINFO should only come from IP. The one initiated from 2176 * user-land should not be allowed. 2177 */ 2178 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 2179 err = EINVAL; 2180 goto failed; 2181 } 2182 2183 nmp = mp->b_cont; 2184 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 2185 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 2186 dlp->dl_primitive != DL_UNITDATA_REQ)) { 2187 err = EINVAL; 2188 goto failed; 2189 } 2190 2191 off = dlp->dl_dest_addr_offset; 2192 len = dlp->dl_dest_addr_length; 2193 2194 if (!MBLKIN(nmp, off, len)) { 2195 err = EINVAL; 2196 goto failed; 2197 } 2198 2199 if (dsp->ds_dlstate != DL_IDLE) { 2200 err = ENOTSUP; 2201 goto failed; 2202 } 2203 2204 addr_length = dsp->ds_mip->mi_addr_length; 2205 if (len != addr_length + sizeof (uint16_t)) { 2206 err = EINVAL; 2207 goto failed; 2208 } 2209 2210 addr = nmp->b_rptr + off; 2211 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2212 2213 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) { 2214 err = ENOMEM; 2215 goto failed; 2216 } 2217 2218 /* 2219 * This ioctl might happen concurrently with a direct call to dld_capab 2220 * that tries to enable direct and/or poll capabilities. Since the 2221 * stack does not serialize them, we do so here to avoid mixing 2222 * the callbacks. 2223 */ 2224 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2225 if (dsp->ds_mode != DLD_FASTPATH) { 2226 /* 2227 * Set the receive callback (unless polling is enabled). 2228 */ 2229 if (!dsp->ds_polling && !dsp->ds_direct) 2230 dls_rx_set(dsp, dld_str_rx_fastpath, dsp); 2231 2232 /* 2233 * Note that fast-path mode is enabled. 2234 */ 2235 dsp->ds_mode = DLD_FASTPATH; 2236 } 2237 mac_perim_exit(mph); 2238 2239 freemsg(nmp->b_cont); 2240 nmp->b_cont = hmp; 2241 2242 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2243 return; 2244 failed: 2245 miocnak(q, mp, 0, err); 2246 } 2247 2248 /* 2249 * DLIOCLOWLINK: request actual link state changes. When the 2250 * link is part of a bridge instance the client receives actual 2251 * link state changes and not the aggregate link status. Used by 2252 * the bridging daemon (bridged) for proper RSTP operation. 2253 */ 2254 static void 2255 ioc_lowlink(dld_str_t *dsp, mblk_t *mp) 2256 { 2257 queue_t *q = dsp->ds_wq; 2258 int err; 2259 2260 if ((err = miocpullup(mp, sizeof (int))) != 0) { 2261 miocnak(q, mp, 0, err); 2262 } else { 2263 /* LINTED: alignment */ 2264 dsp->ds_lowlink = *(boolean_t *)mp->b_cont->b_rptr; 2265 miocack(q, mp, 0, 0); 2266 } 2267 } 2268 2269 /* 2270 * Catch-all handler. 2271 */ 2272 static void 2273 ioc(dld_str_t *dsp, mblk_t *mp) 2274 { 2275 queue_t *q = dsp->ds_wq; 2276 2277 if (dsp->ds_dlstate == DL_UNATTACHED) { 2278 miocnak(q, mp, 0, EINVAL); 2279 return; 2280 } 2281 mac_ioctl(dsp->ds_mh, q, mp); 2282 } 2283