1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2023 Oxide Computer Company 24 */ 25 26 /* 27 * Data-Link Driver 28 */ 29 30 #include <inet/common.h> 31 #include <sys/strsubr.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/vlan.h> 35 #include <sys/dld_impl.h> 36 #include <sys/cpuvar.h> 37 #include <sys/callb.h> 38 #include <sys/list.h> 39 #include <sys/mac_client.h> 40 #include <sys/mac_client_priv.h> 41 #include <sys/mac_flow.h> 42 43 static int str_constructor(void *, void *, int); 44 static void str_destructor(void *, void *); 45 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 46 static void str_notify_promisc_on_phys(dld_str_t *); 47 static void str_notify_promisc_off_phys(dld_str_t *); 48 static void str_notify_phys_addr(dld_str_t *, uint_t, const uint8_t *); 49 static void str_notify_link_up(dld_str_t *); 50 static void str_notify_link_down(dld_str_t *); 51 static void str_notify_capab_reneg(dld_str_t *); 52 static void str_notify_speed(dld_str_t *, uint32_t); 53 54 static void ioc_native(dld_str_t *, mblk_t *); 55 static void ioc_margin(dld_str_t *, mblk_t *); 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc_lowlink(dld_str_t *, mblk_t *); 59 static void ioc(dld_str_t *, mblk_t *); 60 static void dld_ioc(dld_str_t *, mblk_t *); 61 static void dld_wput_nondata(dld_str_t *, mblk_t *); 62 63 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 64 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t, 65 link_tagmode_t); 66 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t); 67 68 static uint32_t str_count; 69 static kmem_cache_t *str_cachep; 70 static mod_hash_t *str_hashp; 71 72 #define STR_HASHSZ 64 73 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 74 75 #define dld_taskq system_taskq 76 77 static kmutex_t dld_taskq_lock; 78 static kcondvar_t dld_taskq_cv; 79 static list_t dld_taskq_list; /* List of dld_str_t */ 80 boolean_t dld_taskq_quit; 81 boolean_t dld_taskq_done; 82 83 static void dld_taskq_dispatch(void); 84 85 /* 86 * Some notes on entry points, flow-control, queueing. 87 * 88 * This driver exports the traditional STREAMS put entry point as well as 89 * the non-STREAMS fast-path transmit routine which is provided to IP via 90 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 91 * and data operations, while the fast-path routine deals only with M_DATA 92 * fast-path packets. Regardless of the entry point, all outbound packets 93 * will end up in DLD_TX(), where they will be delivered to the MAC layer. 94 * 95 * The transmit logic operates in the following way: All packets coming 96 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control 97 * happens when the MAC layer indicates the packets couldn't be 98 * transmitted due to 1) lack of resources (e.g. running out of 99 * descriptors), or 2) reaching the allowed bandwidth limit for this 100 * particular flow. The indication comes in the form of a Tx cookie that 101 * identifies the blocked ring. In such case, DLD will place a 102 * dummy message on its write-side STREAMS queue so that the queue is 103 * marked as "full". Any subsequent packets arriving at the driver will 104 * still be sent to the MAC layer where it either gets queued in the Tx 105 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS 106 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX. 107 * When the write service procedure runs, it will remove the dummy 108 * message from the write-side STREAMS queue; in effect this will trigger 109 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0, 110 * respectively, due to the above reasons. 111 * 112 * All non-data operations, both DLPI and ioctls are single threaded on a per 113 * dld_str_t endpoint. This is done using a taskq so that the control operation 114 * has kernel context and can cv_wait for resources. In addition all set type 115 * operations that involve mac level state modification are serialized on a 116 * per mac end point using the perimeter mechanism provided by the mac layer. 117 * This serializes all mac clients trying to modify a single mac end point over 118 * the entire sequence of mac calls made by that client as an atomic unit. The 119 * mac framework locking is described in mac.c. A critical element is that 120 * DLD/DLS does not hold any locks across the mac perimeter. 121 * 122 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 123 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 124 * match dev_t. If a stream is found and it is attached, its dev_info_t * 125 * is returned. If the mac handle is non-null, it can be safely accessed 126 * below. The mac handle won't be freed until the mac_unregister which 127 * won't happen until the driver detaches. The DDI framework ensures that 128 * the detach won't happen while a getinfo is in progress. 129 */ 130 typedef struct i_dld_str_state_s { 131 major_t ds_major; 132 minor_t ds_minor; 133 int ds_instance; 134 dev_info_t *ds_dip; 135 } i_dld_str_state_t; 136 137 /* ARGSUSED */ 138 static uint_t 139 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 140 { 141 i_dld_str_state_t *statep = arg; 142 dld_str_t *dsp = (dld_str_t *)val; 143 mac_handle_t mh; 144 145 if (statep->ds_major != dsp->ds_major) 146 return (MH_WALK_CONTINUE); 147 148 ASSERT(statep->ds_minor != 0); 149 mh = dsp->ds_mh; 150 151 if (statep->ds_minor == dsp->ds_minor) { 152 /* 153 * Clone: a clone minor is unique. we can terminate the 154 * walk if we find a matching stream -- even if we fail 155 * to obtain the devinfo. 156 */ 157 if (mh != NULL) { 158 statep->ds_dip = mac_devinfo_get(mh); 159 statep->ds_instance = DLS_MINOR2INST(mac_minor(mh)); 160 } 161 return (MH_WALK_TERMINATE); 162 } 163 return (MH_WALK_CONTINUE); 164 } 165 166 static dev_info_t * 167 dld_finddevinfo(dev_t dev) 168 { 169 dev_info_t *dip; 170 i_dld_str_state_t state; 171 172 if (getminor(dev) == 0) 173 return (NULL); 174 175 /* 176 * See if it's a minor node of a link 177 */ 178 if ((dip = dls_link_devinfo(dev)) != NULL) 179 return (dip); 180 181 state.ds_minor = getminor(dev); 182 state.ds_major = getmajor(dev); 183 state.ds_dip = NULL; 184 state.ds_instance = -1; 185 186 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 187 return (state.ds_dip); 188 } 189 190 int 191 dld_devt_to_instance(dev_t dev) 192 { 193 minor_t minor; 194 i_dld_str_state_t state; 195 196 /* 197 * GLDv3 numbers DLPI style 1 node as the instance number + 1. 198 * Minor number 0 is reserved for the DLPI style 2 unattached 199 * node. 200 */ 201 202 if ((minor = getminor(dev)) == 0) 203 return (-1); 204 205 /* 206 * Check for unopened style 1 node. 207 * Note that this doesn't *necessarily* work for legacy 208 * devices, but this code is only called within the 209 * getinfo(9e) implementation for true GLDv3 devices, so it 210 * doesn't matter. 211 */ 212 if (minor > 0 && minor <= DLS_MAX_MINOR) { 213 return (DLS_MINOR2INST(minor)); 214 } 215 216 state.ds_minor = getminor(dev); 217 state.ds_major = getmajor(dev); 218 state.ds_dip = NULL; 219 state.ds_instance = -1; 220 221 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 222 return (state.ds_instance); 223 } 224 225 /* 226 * devo_getinfo: getinfo(9e) 227 * 228 * NB: This may be called for a provider before the provider's 229 * instances are attached. Hence, if a particular provider needs a 230 * special mapping (the mac instance != ddi_get_instance()), then it 231 * may need to provide its own implementation using the 232 * mac_devt_to_instance() function, and translating the returned mac 233 * instance to a devinfo instance. For dev_t's where the minor number 234 * is too large (i.e. > MAC_MAX_MINOR), the provider can call this 235 * function indirectly via the mac_getinfo() function. 236 */ 237 /*ARGSUSED*/ 238 int 239 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 240 { 241 dev_info_t *devinfo; 242 minor_t minor = getminor((dev_t)arg); 243 int rc = DDI_FAILURE; 244 245 switch (cmd) { 246 case DDI_INFO_DEVT2DEVINFO: 247 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 248 *(dev_info_t **)resp = devinfo; 249 rc = DDI_SUCCESS; 250 } 251 break; 252 case DDI_INFO_DEVT2INSTANCE: 253 if (minor > 0 && minor <= DLS_MAX_MINOR) { 254 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 255 rc = DDI_SUCCESS; 256 } else if (minor > DLS_MAX_MINOR && 257 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 258 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 259 rc = DDI_SUCCESS; 260 } 261 break; 262 } 263 return (rc); 264 } 265 266 void * 267 dld_str_private(queue_t *q) 268 { 269 return (((dld_str_t *)(q->q_ptr))->ds_private); 270 } 271 272 int 273 dld_str_open(queue_t *rq, dev_t *devp, void *private) 274 { 275 dld_str_t *dsp; 276 major_t major; 277 minor_t minor; 278 int err; 279 280 major = getmajor(*devp); 281 minor = getminor(*devp); 282 283 /* 284 * Half the 32-bit minor space is reserved for private use by the driver 285 * so we bail out here with `ENOSTR` to indicate specfs should retry the 286 * open with the driver's character based `open(9E)`. For a typical 287 * STREAMS driver, that would just be `nodev` which would simply return 288 * `ENODEV`. But a dual-personality device can choose to implement the 289 * character based `open(9E)` for some minor nodes. A driver wanting a 290 * separate STREAMS interface altogether would presumably have already 291 * provided its own `streamtab`. 292 */ 293 if (minor >= mac_private_minor()) 294 return (ENOSTR); 295 296 /* 297 * Create a new dld_str_t for the stream. This will grab a new minor 298 * number that will be handed back in the cloned dev_t. Creation may 299 * fail if we can't allocate the dummy mblk used for flow-control. 300 */ 301 dsp = dld_str_create(rq, DLD_DLPI, major, 302 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 303 if (dsp == NULL) 304 return (ENOSR); 305 306 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 307 dsp->ds_private = private; 308 if (minor != 0) { 309 /* 310 * Style 1 open 311 */ 312 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 313 goto failed; 314 315 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 316 } else { 317 (void) qassociate(rq, -1); 318 } 319 320 /* 321 * Enable the queue srv(9e) routine. 322 */ 323 qprocson(rq); 324 325 /* 326 * Construct a cloned dev_t to hand back. 327 */ 328 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 329 return (0); 330 331 failed: 332 dld_str_destroy(dsp); 333 return (err); 334 } 335 336 int 337 dld_str_close(queue_t *rq) 338 { 339 dld_str_t *dsp = rq->q_ptr; 340 341 /* 342 * All modules on top have been popped off. So there can't be any 343 * threads from the top. 344 */ 345 ASSERT(dsp->ds_datathr_cnt == 0); 346 347 /* 348 * Wait until pending DLPI requests are processed. 349 */ 350 mutex_enter(&dsp->ds_lock); 351 while (dsp->ds_dlpi_pending) 352 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock); 353 mutex_exit(&dsp->ds_lock); 354 355 356 /* 357 * This stream was open to a provider node. Check to see 358 * if it has been cleanly shut down. 359 */ 360 if (dsp->ds_dlstate != DL_UNATTACHED) { 361 /* 362 * The stream is either open to a style 1 provider or 363 * this is not clean shutdown. Detach from the PPA. 364 * (This is still ok even in the style 1 case). 365 */ 366 dld_str_detach(dsp); 367 } 368 369 dld_str_destroy(dsp); 370 return (0); 371 } 372 373 /* 374 * qi_qopen: open(9e) 375 */ 376 /*ARGSUSED*/ 377 int 378 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 379 { 380 if (sflag == MODOPEN) 381 return (ENOTSUP); 382 383 /* 384 * This is a cloning driver and therefore each queue should only 385 * ever get opened once. 386 */ 387 if (rq->q_ptr != NULL) 388 return (EBUSY); 389 390 return (dld_str_open(rq, devp, NULL)); 391 } 392 393 /* 394 * qi_qclose: close(9e) 395 */ 396 /* ARGSUSED */ 397 int 398 dld_close(queue_t *rq, int flags __unused, cred_t *credp __unused) 399 { 400 /* 401 * Disable the queue srv(9e) routine. 402 */ 403 qprocsoff(rq); 404 405 return (dld_str_close(rq)); 406 } 407 408 /* 409 * qi_qputp: put(9e) 410 */ 411 int 412 dld_wput(queue_t *wq, mblk_t *mp) 413 { 414 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 415 dld_str_mode_t mode; 416 417 switch (DB_TYPE(mp)) { 418 case M_DATA: 419 mutex_enter(&dsp->ds_lock); 420 mode = dsp->ds_mode; 421 if ((dsp->ds_dlstate != DL_IDLE) || 422 (mode != DLD_FASTPATH && mode != DLD_RAW)) { 423 mutex_exit(&dsp->ds_lock); 424 freemsg(mp); 425 break; 426 } 427 428 DLD_DATATHR_INC(dsp); 429 mutex_exit(&dsp->ds_lock); 430 if (mode == DLD_FASTPATH) { 431 if (dsp->ds_mip->mi_media == DL_ETHER && 432 (MBLKL(mp) < sizeof (struct ether_header))) { 433 freemsg(mp); 434 } else { 435 (void) str_mdata_fastpath_put(dsp, mp, 0, 0); 436 } 437 } else { 438 str_mdata_raw_put(dsp, mp); 439 } 440 DLD_DATATHR_DCR(dsp); 441 break; 442 case M_PROTO: 443 case M_PCPROTO: { 444 t_uscalar_t prim; 445 446 if (MBLKL(mp) < sizeof (t_uscalar_t)) 447 break; 448 449 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 450 451 if (prim == DL_UNITDATA_REQ) { 452 proto_unitdata_req(dsp, mp); 453 } else { 454 dld_wput_nondata(dsp, mp); 455 } 456 break; 457 } 458 459 case M_IOCTL: 460 dld_wput_nondata(dsp, mp); 461 break; 462 463 case M_FLUSH: 464 if (*mp->b_rptr & FLUSHW) { 465 DLD_CLRQFULL(dsp); 466 *mp->b_rptr &= ~FLUSHW; 467 } 468 469 if (*mp->b_rptr & FLUSHR) { 470 qreply(wq, mp); 471 } else { 472 freemsg(mp); 473 } 474 break; 475 476 default: 477 freemsg(mp); 478 break; 479 } 480 return (0); 481 } 482 483 /* 484 * qi_srvp: srv(9e) 485 */ 486 int 487 dld_wsrv(queue_t *wq) 488 { 489 dld_str_t *dsp = wq->q_ptr; 490 491 DLD_CLRQFULL(dsp); 492 return (0); 493 } 494 495 void 496 dld_init_ops(struct dev_ops *ops, const char *name) 497 { 498 struct streamtab *stream; 499 struct qinit *rq, *wq; 500 struct module_info *modinfo; 501 502 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 503 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 504 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 505 modinfo->mi_minpsz = 0; 506 modinfo->mi_maxpsz = 64*1024; 507 modinfo->mi_hiwat = 1; 508 modinfo->mi_lowat = 0; 509 510 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 511 rq->qi_qopen = dld_open; 512 rq->qi_qclose = dld_close; 513 rq->qi_minfo = modinfo; 514 515 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 516 wq->qi_putp = (pfi_t)dld_wput; 517 wq->qi_srvp = (pfi_t)dld_wsrv; 518 wq->qi_minfo = modinfo; 519 520 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 521 stream->st_rdinit = rq; 522 stream->st_wrinit = wq; 523 ops->devo_cb_ops->cb_str = stream; 524 525 if (ops->devo_getinfo == NULL) 526 ops->devo_getinfo = &dld_getinfo; 527 } 528 529 void 530 dld_fini_ops(struct dev_ops *ops) 531 { 532 struct streamtab *stream; 533 struct qinit *rq, *wq; 534 struct module_info *modinfo; 535 536 stream = ops->devo_cb_ops->cb_str; 537 rq = stream->st_rdinit; 538 wq = stream->st_wrinit; 539 modinfo = rq->qi_minfo; 540 ASSERT(wq->qi_minfo == modinfo); 541 542 kmem_free(stream, sizeof (struct streamtab)); 543 kmem_free(wq, sizeof (struct qinit)); 544 kmem_free(rq, sizeof (struct qinit)); 545 kmem_free(modinfo->mi_idname, FMNAMESZ); 546 kmem_free(modinfo, sizeof (struct module_info)); 547 } 548 549 /* 550 * Initialize this module's data structures. 551 */ 552 void 553 dld_str_init(void) 554 { 555 /* 556 * Create dld_str_t object cache. 557 */ 558 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 559 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 560 ASSERT(str_cachep != NULL); 561 562 /* 563 * Create a hash table for maintaining dld_str_t's. 564 * The ds_minor field (the clone minor number) of a dld_str_t 565 * is used as a key for this hash table because this number is 566 * globally unique (allocated from "dls_minor_arena"). 567 */ 568 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 569 mod_hash_null_valdtor); 570 571 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL); 572 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL); 573 574 dld_taskq_quit = B_FALSE; 575 dld_taskq_done = B_FALSE; 576 list_create(&dld_taskq_list, sizeof (dld_str_t), 577 offsetof(dld_str_t, ds_tqlist)); 578 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0, 579 &p0, TS_RUN, minclsyspri); 580 } 581 582 /* 583 * Tear down this module's data structures. 584 */ 585 int 586 dld_str_fini(void) 587 { 588 /* 589 * Make sure that there are no objects in use. 590 */ 591 if (str_count != 0) 592 return (EBUSY); 593 594 /* 595 * Ask the dld_taskq thread to quit and wait for it to be done 596 */ 597 mutex_enter(&dld_taskq_lock); 598 dld_taskq_quit = B_TRUE; 599 cv_signal(&dld_taskq_cv); 600 while (!dld_taskq_done) 601 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 602 mutex_exit(&dld_taskq_lock); 603 list_destroy(&dld_taskq_list); 604 /* 605 * Destroy object cache. 606 */ 607 kmem_cache_destroy(str_cachep); 608 mod_hash_destroy_idhash(str_hashp); 609 return (0); 610 } 611 612 /* 613 * Create a new dld_str_t object. 614 */ 615 dld_str_t * 616 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 617 { 618 dld_str_t *dsp; 619 int err; 620 621 /* 622 * Allocate an object from the cache. 623 */ 624 atomic_inc_32(&str_count); 625 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 626 627 /* 628 * Allocate the dummy mblk for flow-control. 629 */ 630 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 631 if (dsp->ds_tx_flow_mp == NULL) { 632 kmem_cache_free(str_cachep, dsp); 633 atomic_dec_32(&str_count); 634 return (NULL); 635 } 636 dsp->ds_type = type; 637 dsp->ds_major = major; 638 dsp->ds_style = style; 639 640 /* 641 * Initialize the queue pointers. 642 */ 643 ASSERT(RD(rq) == rq); 644 dsp->ds_rq = rq; 645 dsp->ds_wq = WR(rq); 646 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 647 648 /* 649 * We want explicit control over our write-side STREAMS queue 650 * where the dummy mblk gets added/removed for flow-control. 651 */ 652 noenable(WR(rq)); 653 654 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 655 (mod_hash_val_t)dsp); 656 ASSERT(err == 0); 657 return (dsp); 658 } 659 660 /* 661 * Destroy a dld_str_t object. 662 */ 663 void 664 dld_str_destroy(dld_str_t *dsp) 665 { 666 queue_t *rq; 667 queue_t *wq; 668 mod_hash_val_t val; 669 670 /* 671 * Clear the queue pointers. 672 */ 673 rq = dsp->ds_rq; 674 wq = dsp->ds_wq; 675 ASSERT(wq == WR(rq)); 676 rq->q_ptr = wq->q_ptr = NULL; 677 dsp->ds_rq = dsp->ds_wq = NULL; 678 679 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 680 ASSERT(dsp->ds_sap == 0); 681 ASSERT(dsp->ds_mh == NULL); 682 ASSERT(dsp->ds_mch == NULL); 683 ASSERT(dsp->ds_promisc == 0); 684 ASSERT(dsp->ds_mph == NULL); 685 ASSERT(dsp->ds_mip == NULL); 686 ASSERT(dsp->ds_mnh == NULL); 687 688 ASSERT(dsp->ds_polling == B_FALSE); 689 ASSERT(dsp->ds_direct == B_FALSE); 690 ASSERT(dsp->ds_lso == B_FALSE); 691 ASSERT(dsp->ds_lso_max == 0); 692 ASSERT(dsp->ds_passivestate != DLD_ACTIVE); 693 694 /* 695 * Reinitialize all the flags. 696 */ 697 dsp->ds_notifications = 0; 698 dsp->ds_passivestate = DLD_UNINITIALIZED; 699 dsp->ds_mode = DLD_UNITDATA; 700 dsp->ds_native = B_FALSE; 701 dsp->ds_nonip = B_FALSE; 702 703 ASSERT(dsp->ds_datathr_cnt == 0); 704 ASSERT(dsp->ds_pending_head == NULL); 705 ASSERT(dsp->ds_pending_tail == NULL); 706 ASSERT(!dsp->ds_dlpi_pending); 707 708 ASSERT(dsp->ds_dlp == NULL); 709 ASSERT(dsp->ds_dmap == NULL); 710 ASSERT(dsp->ds_rx == NULL); 711 ASSERT(dsp->ds_rx_arg == NULL); 712 ASSERT(dsp->ds_next == NULL); 713 ASSERT(dsp->ds_head == NULL); 714 715 /* 716 * Free the dummy mblk if exists. 717 */ 718 if (dsp->ds_tx_flow_mp != NULL) { 719 freeb(dsp->ds_tx_flow_mp); 720 dsp->ds_tx_flow_mp = NULL; 721 } 722 723 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 724 ASSERT(dsp == (dld_str_t *)val); 725 726 /* 727 * Free the object back to the cache. 728 */ 729 kmem_cache_free(str_cachep, dsp); 730 atomic_dec_32(&str_count); 731 } 732 733 /* 734 * kmem_cache contructor function: see kmem_cache_create(9f). 735 */ 736 /*ARGSUSED*/ 737 static int 738 str_constructor(void *buf, void *cdrarg, int kmflags) 739 { 740 dld_str_t *dsp = buf; 741 742 bzero(buf, sizeof (dld_str_t)); 743 744 /* 745 * Allocate a new minor number. 746 */ 747 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) 748 return (-1); 749 750 /* 751 * Initialize the DLPI state machine. 752 */ 753 dsp->ds_dlstate = DL_UNATTACHED; 754 755 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL); 756 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL); 757 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL); 758 759 return (0); 760 } 761 762 /* 763 * kmem_cache destructor function. 764 */ 765 /*ARGSUSED*/ 766 static void 767 str_destructor(void *buf, void *cdrarg) 768 { 769 dld_str_t *dsp = buf; 770 771 /* 772 * Release the minor number. 773 */ 774 mac_minor_rele(dsp->ds_minor); 775 776 ASSERT(dsp->ds_tx_flow_mp == NULL); 777 778 mutex_destroy(&dsp->ds_lock); 779 cv_destroy(&dsp->ds_datathr_cv); 780 cv_destroy(&dsp->ds_dlpi_pending_cv); 781 } 782 783 /* 784 * Update the priority bits and VID (may need to insert tag if mp points 785 * to an untagged packet. 786 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 787 */ 788 static mblk_t * 789 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid, 790 link_tagmode_t tagmode) 791 { 792 mblk_t *hmp; 793 struct ether_vlan_header *evhp; 794 struct ether_header *ehp; 795 uint16_t old_tci = 0; 796 size_t len; 797 798 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 799 800 evhp = (struct ether_vlan_header *)mp->b_rptr; 801 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 802 /* 803 * Tagged packet, update the priority bits. 804 */ 805 len = sizeof (struct ether_vlan_header); 806 807 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 808 /* 809 * In case some drivers only check the db_ref 810 * count of the first mblk, we pullup the 811 * message into a single mblk. 812 */ 813 hmp = msgpullup(mp, -1); 814 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 815 freemsg(hmp); 816 return (NULL); 817 } else { 818 freemsg(mp); 819 mp = hmp; 820 } 821 } 822 823 evhp = (struct ether_vlan_header *)mp->b_rptr; 824 old_tci = ntohs(evhp->ether_tci); 825 } else { 826 /* 827 * Untagged packet. Two factors will cause us to insert a 828 * VLAN header: 829 * - This is a VLAN link (vid is specified) 830 * - The link supports user priority tagging and the priority 831 * is non-zero. 832 */ 833 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY) 834 return (mp); 835 836 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 837 if (hmp == NULL) 838 return (NULL); 839 840 evhp = (struct ether_vlan_header *)hmp->b_rptr; 841 ehp = (struct ether_header *)mp->b_rptr; 842 843 /* 844 * Copy the MAC addresses and typelen 845 */ 846 bcopy(ehp, evhp, (ETHERADDRL * 2)); 847 evhp->ether_type = ehp->ether_type; 848 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 849 850 hmp->b_wptr += sizeof (struct ether_vlan_header); 851 mp->b_rptr += sizeof (struct ether_header); 852 853 /* 854 * Free the original message if it's now empty. Link the 855 * rest of the messages to the header message. 856 */ 857 if (MBLKL(mp) == 0) { 858 hmp->b_cont = mp->b_cont; 859 freeb(mp); 860 } else { 861 hmp->b_cont = mp; 862 } 863 mp = hmp; 864 } 865 866 if (pri == 0) 867 pri = VLAN_PRI(old_tci); 868 if (vid == VLAN_ID_NONE) 869 vid = VLAN_ID(old_tci); 870 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 871 return (mp); 872 } 873 874 /* 875 * M_DATA put (IP fast-path mode) 876 */ 877 mac_tx_cookie_t 878 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint, 879 uint16_t flag) 880 { 881 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 882 mblk_t *newmp; 883 uint_t pri; 884 mac_tx_cookie_t cookie; 885 886 if (is_ethernet) { 887 /* 888 * Update the priority bits to the assigned priority. 889 */ 890 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 891 892 if (pri != 0) { 893 newmp = i_dld_ether_header_update_tag(mp, pri, 894 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode); 895 if (newmp == NULL) 896 goto discard; 897 mp = newmp; 898 } 899 } 900 901 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != 0) { 902 DLD_SETQFULL(dsp); 903 } 904 return (cookie); 905 906 discard: 907 /* TODO: bump kstat? */ 908 freemsg(mp); 909 return (0); 910 } 911 912 /* 913 * M_DATA put (DLIOCRAW mode) 914 */ 915 static void 916 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 917 { 918 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 919 mblk_t *bp, *newmp; 920 size_t size; 921 mac_header_info_t mhi; 922 uint_t pri, vid, dvid; 923 uint_t max_sdu; 924 925 /* 926 * Certain MAC type plugins provide an illusion for raw DLPI 927 * consumers. They pretend that the MAC layer is something that 928 * it's not for the benefit of observability tools. For example, 929 * mac_wifi pretends that it's Ethernet for such consumers. 930 * Here, unless native mode is enabled, we call into the MAC layer so 931 * that this illusion can be maintained. The plugin will optionally 932 * transform the MAC header here into something that can be passed 933 * down. The header goes from raw mode to "cooked" mode. 934 */ 935 if (!dsp->ds_native) { 936 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 937 goto discard; 938 mp = newmp; 939 } 940 941 size = MBLKL(mp); 942 943 /* 944 * Check the packet is not too big and that any remaining 945 * fragment list is composed entirely of M_DATA messages. (We 946 * know the first fragment was M_DATA otherwise we could not 947 * have got here). 948 */ 949 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 950 if (DB_TYPE(bp) != M_DATA) 951 goto discard; 952 size += MBLKL(bp); 953 } 954 955 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0) 956 goto discard; 957 958 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 959 /* 960 * If LSO is enabled, check the size against lso_max. Otherwise, 961 * compare the packet size with max_sdu. 962 */ 963 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu; 964 if (size > max_sdu + mhi.mhi_hdrsize) 965 goto discard; 966 967 if (is_ethernet) { 968 dvid = mac_client_vid(dsp->ds_mch); 969 970 /* 971 * Discard the packet if this is a VLAN stream but the VID in 972 * the packet is not correct. 973 */ 974 vid = VLAN_ID(mhi.mhi_tci); 975 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 976 goto discard; 977 978 /* 979 * Discard the packet if this packet is a tagged packet 980 * but both pri and VID are 0. 981 */ 982 pri = VLAN_PRI(mhi.mhi_tci); 983 if (mhi.mhi_istagged && !mhi.mhi_ispvid && pri == 0 && 984 vid == VLAN_ID_NONE) 985 goto discard; 986 987 /* 988 * Update the priority bits to the per-stream priority if 989 * priority is not set in the packet. Update the VID for 990 * packets on a VLAN stream. 991 */ 992 pri = (pri == 0) ? dsp->ds_pri : 0; 993 if ((pri != 0) || (dvid != VLAN_ID_NONE)) { 994 if ((newmp = i_dld_ether_header_update_tag(mp, pri, 995 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) { 996 goto discard; 997 } 998 mp = newmp; 999 } 1000 } 1001 1002 if (DLD_TX(dsp, mp, 0, 0) != 0) { 1003 /* Turn on flow-control for dld */ 1004 DLD_SETQFULL(dsp); 1005 } 1006 return; 1007 1008 discard: 1009 /* TODO: bump kstat? */ 1010 freemsg(mp); 1011 } 1012 1013 /* 1014 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 1015 */ 1016 int 1017 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1018 { 1019 dev_t dev; 1020 int err; 1021 const char *drvname; 1022 mac_perim_handle_t mph = NULL; 1023 boolean_t qassociated = B_FALSE; 1024 dls_link_t *dlp = NULL; 1025 dls_dl_handle_t ddp = NULL; 1026 1027 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1028 return (EINVAL); 1029 1030 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) 1031 return (ENOTSUP); 1032 1033 /* 1034 * /dev node access. This will still be supported for backward 1035 * compatibility reason. 1036 */ 1037 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && 1038 (strcmp(drvname, "vnic") != 0)) { 1039 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1040 return (EINVAL); 1041 qassociated = B_TRUE; 1042 } 1043 1044 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); 1045 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0) 1046 goto failed; 1047 1048 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0) 1049 goto failed; 1050 1051 /* 1052 * Open a channel. 1053 */ 1054 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0) 1055 goto failed; 1056 1057 if ((err = dls_open(dlp, ddp, dsp)) != 0) 1058 goto failed; 1059 1060 /* 1061 * Set the default packet priority. 1062 */ 1063 dsp->ds_pri = 0; 1064 1065 /* 1066 * Add a notify function so that the we get updates from the MAC. 1067 */ 1068 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp); 1069 dsp->ds_dlstate = DL_UNBOUND; 1070 mac_perim_exit(mph); 1071 return (0); 1072 1073 failed: 1074 if (dlp != NULL) 1075 dls_link_rele(dlp); 1076 if (mph != NULL) 1077 mac_perim_exit(mph); 1078 if (ddp != NULL) 1079 dls_devnet_rele(ddp); 1080 if (qassociated) 1081 (void) qassociate(dsp->ds_wq, -1); 1082 1083 return (err); 1084 } 1085 1086 /* 1087 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1088 * from close(2) for style 2. 1089 */ 1090 void 1091 dld_str_detach(dld_str_t *dsp) 1092 { 1093 mac_perim_handle_t mph; 1094 int err; 1095 1096 ASSERT(dsp->ds_datathr_cnt == 0); 1097 1098 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 1099 /* 1100 * Remove the notify function. 1101 * 1102 * Note that we cannot wait for the notification callback to be removed 1103 * since it could cause the deadlock with str_notify() since they both 1104 * need the mac perimeter. Continue if we cannot remove the 1105 * notification callback right now and wait after we leave the 1106 * perimeter. 1107 */ 1108 err = mac_notify_remove(dsp->ds_mnh, B_FALSE); 1109 dsp->ds_mnh = NULL; 1110 1111 /* 1112 * Disable the capabilities 1113 */ 1114 dld_capabilities_disable(dsp); 1115 1116 /* 1117 * Clear LSO flags. 1118 */ 1119 dsp->ds_lso = B_FALSE; 1120 dsp->ds_lso_max = 0; 1121 1122 dls_close(dsp); 1123 mac_perim_exit(mph); 1124 1125 /* 1126 * Now we leave the mac perimeter. If mac_notify_remove() failed 1127 * because the notification callback was in progress, wait for 1128 * it to finish before we proceed. 1129 */ 1130 if (err != 0) 1131 mac_notify_remove_wait(dsp->ds_mh); 1132 1133 /* 1134 * An unreferenced tagged (non-persistent) vlan gets destroyed 1135 * automatically in the call to dls_devnet_rele. 1136 */ 1137 dls_devnet_rele(dsp->ds_ddh); 1138 1139 dsp->ds_sap = 0; 1140 dsp->ds_mh = NULL; 1141 dsp->ds_mch = NULL; 1142 dsp->ds_mip = NULL; 1143 1144 if (dsp->ds_style == DL_STYLE2) 1145 (void) qassociate(dsp->ds_wq, -1); 1146 1147 /* 1148 * Re-initialize the DLPI state machine. 1149 */ 1150 dsp->ds_dlstate = DL_UNATTACHED; 1151 } 1152 1153 /* 1154 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1155 * tags before sending packets up to the DLS clients, with the exception of 1156 * special priority tagged packets, in that case, we set the VID to 0. 1157 * mp must be a VLAN tagged packet. 1158 */ 1159 static mblk_t * 1160 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri) 1161 { 1162 mblk_t *newmp; 1163 struct ether_vlan_header *evhp; 1164 uint16_t tci, new_tci; 1165 1166 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1167 if (DB_REF(mp) > 1) { 1168 newmp = copymsg(mp); 1169 if (newmp == NULL) 1170 return (NULL); 1171 freemsg(mp); 1172 mp = newmp; 1173 } 1174 evhp = (struct ether_vlan_header *)mp->b_rptr; 1175 1176 tci = ntohs(evhp->ether_tci); 1177 if (VLAN_PRI(tci) == 0 || !keep_pri) { 1178 /* 1179 * Priority is 0, strip the tag. 1180 */ 1181 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1182 mp->b_rptr += VLAN_TAGSZ; 1183 } else { 1184 /* 1185 * Priority is not 0, update the VID to 0. 1186 */ 1187 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1188 evhp->ether_tci = htons(new_tci); 1189 } 1190 return (mp); 1191 } 1192 1193 /* 1194 * Raw mode receive function. 1195 */ 1196 /*ARGSUSED*/ 1197 void 1198 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1199 mac_header_info_t *mhip) 1200 { 1201 dld_str_t *dsp = (dld_str_t *)arg; 1202 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1203 mblk_t *next, *newmp; 1204 1205 ASSERT(mp != NULL); 1206 do { 1207 /* 1208 * Get the pointer to the next packet in the chain and then 1209 * clear b_next before the packet gets passed on. 1210 */ 1211 next = mp->b_next; 1212 mp->b_next = NULL; 1213 1214 /* 1215 * Wind back b_rptr to point at the MAC header. 1216 */ 1217 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1218 mp->b_rptr -= mhip->mhi_hdrsize; 1219 1220 /* 1221 * Certain MAC type plugins provide an illusion for raw 1222 * DLPI consumers. They pretend that the MAC layer is 1223 * something that it's not for the benefit of observability 1224 * tools. For example, mac_wifi pretends that it's Ethernet 1225 * for such consumers. Here, unless native mode is enabled, 1226 * we call into the MAC layer so that this illusion can be 1227 * maintained. The plugin will optionally transform the MAC 1228 * header here into something that can be passed up to raw 1229 * consumers. The header goes from "cooked" mode to raw mode. 1230 */ 1231 if (!dsp->ds_native) { 1232 newmp = mac_header_uncook(dsp->ds_mh, mp); 1233 if (newmp == NULL) { 1234 freemsg(mp); 1235 goto next; 1236 } 1237 mp = newmp; 1238 } 1239 1240 /* 1241 * Strip the VLAN tag for VLAN streams. 1242 */ 1243 if (is_ethernet && 1244 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) { 1245 /* 1246 * The priority should be kept only for VLAN 1247 * data-links. 1248 */ 1249 newmp = i_dld_ether_header_strip_tag(mp, 1250 mac_client_is_vlan_vnic(dsp->ds_mch)); 1251 if (newmp == NULL) { 1252 freemsg(mp); 1253 goto next; 1254 } 1255 mp = newmp; 1256 } 1257 1258 /* 1259 * Pass the packet on. 1260 */ 1261 if (canputnext(dsp->ds_rq)) 1262 putnext(dsp->ds_rq, mp); 1263 else 1264 freemsg(mp); 1265 1266 next: 1267 /* 1268 * Move on to the next packet in the chain. 1269 */ 1270 mp = next; 1271 } while (mp != NULL); 1272 } 1273 1274 /* 1275 * Fast-path receive function. 1276 */ 1277 /*ARGSUSED*/ 1278 void 1279 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1280 mac_header_info_t *mhip) 1281 { 1282 dld_str_t *dsp = (dld_str_t *)arg; 1283 mblk_t *next; 1284 size_t offset = 0; 1285 1286 /* 1287 * MAC header stripping rules: 1288 * - Tagged packets: 1289 * a. VLAN streams. Strip the whole VLAN header including the tag. 1290 * b. Physical streams 1291 * - VLAN packets (non-zero VID). The stream must be either a 1292 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1293 * Strip the Ethernet header but keep the VLAN header. 1294 * - Special tagged packets (zero VID) 1295 * * The stream is either a DL_PROMISC_SAP listener or a 1296 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1297 * keep the VLAN header. 1298 * * Otherwise, strip the whole VLAN header. 1299 * - Untagged packets. Strip the whole MAC header. 1300 */ 1301 if (mhip->mhi_istagged && 1302 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1303 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1304 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1305 offset = VLAN_TAGSZ; 1306 } 1307 1308 ASSERT(mp != NULL); 1309 do { 1310 /* 1311 * Get the pointer to the next packet in the chain and then 1312 * clear b_next before the packet gets passed on. 1313 */ 1314 next = mp->b_next; 1315 mp->b_next = NULL; 1316 1317 /* 1318 * Wind back b_rptr to point at the VLAN header. 1319 */ 1320 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1321 mp->b_rptr -= offset; 1322 1323 /* 1324 * Pass the packet on. 1325 */ 1326 if (canputnext(dsp->ds_rq)) 1327 putnext(dsp->ds_rq, mp); 1328 else 1329 freemsg(mp); 1330 /* 1331 * Move on to the next packet in the chain. 1332 */ 1333 mp = next; 1334 } while (mp != NULL); 1335 } 1336 1337 /* 1338 * Default receive function (send DL_UNITDATA_IND messages). 1339 */ 1340 /*ARGSUSED*/ 1341 void 1342 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1343 mac_header_info_t *mhip) 1344 { 1345 dld_str_t *dsp = (dld_str_t *)arg; 1346 mblk_t *ud_mp; 1347 mblk_t *next; 1348 size_t offset = 0; 1349 boolean_t strip_vlan = B_TRUE; 1350 1351 /* 1352 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1353 */ 1354 if (mhip->mhi_istagged && 1355 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1356 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1357 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1358 offset = VLAN_TAGSZ; 1359 strip_vlan = B_FALSE; 1360 } 1361 1362 ASSERT(mp != NULL); 1363 do { 1364 /* 1365 * Get the pointer to the next packet in the chain and then 1366 * clear b_next before the packet gets passed on. 1367 */ 1368 next = mp->b_next; 1369 mp->b_next = NULL; 1370 1371 /* 1372 * Wind back b_rptr to point at the MAC header. 1373 */ 1374 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1375 mp->b_rptr -= mhip->mhi_hdrsize; 1376 1377 /* 1378 * Create the DL_UNITDATA_IND M_PROTO. 1379 */ 1380 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1381 freemsgchain(mp); 1382 return; 1383 } 1384 1385 /* 1386 * Advance b_rptr to point at the payload (or the VLAN header). 1387 */ 1388 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1389 1390 /* 1391 * Prepend the DL_UNITDATA_IND. 1392 */ 1393 ud_mp->b_cont = mp; 1394 1395 /* 1396 * Send the message. 1397 */ 1398 if (canputnext(dsp->ds_rq)) 1399 putnext(dsp->ds_rq, ud_mp); 1400 else 1401 freemsg(ud_mp); 1402 1403 /* 1404 * Move on to the next packet in the chain. 1405 */ 1406 mp = next; 1407 } while (mp != NULL); 1408 } 1409 1410 /* 1411 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE 1412 */ 1413 static void 1414 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu, uint_t multicast_sdu) 1415 { 1416 mblk_t *mp; 1417 dl_notify_ind_t *dlip; 1418 1419 if (!(dsp->ds_notifications & (DL_NOTE_SDU_SIZE|DL_NOTE_SDU_SIZE2))) 1420 return; 1421 1422 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1423 M_PROTO, 0)) == NULL) 1424 return; 1425 1426 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1427 dlip = (dl_notify_ind_t *)mp->b_rptr; 1428 dlip->dl_primitive = DL_NOTIFY_IND; 1429 if (dsp->ds_notifications & DL_NOTE_SDU_SIZE2) { 1430 dlip->dl_notification = DL_NOTE_SDU_SIZE2; 1431 dlip->dl_data1 = max_sdu; 1432 dlip->dl_data2 = multicast_sdu; 1433 } else { 1434 dlip->dl_notification = DL_NOTE_SDU_SIZE; 1435 dlip->dl_data = max_sdu; 1436 } 1437 1438 qreply(dsp->ds_wq, mp); 1439 } 1440 1441 /* 1442 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1443 * current state of the interface. 1444 */ 1445 void 1446 dld_str_notify_ind(dld_str_t *dsp) 1447 { 1448 mac_notify_type_t type; 1449 1450 for (type = 0; type < MAC_NNOTE; type++) 1451 str_notify(dsp, type); 1452 } 1453 1454 typedef struct dl_unitdata_ind_wrapper { 1455 dl_unitdata_ind_t dl_unitdata; 1456 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1457 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1458 } dl_unitdata_ind_wrapper_t; 1459 1460 /* 1461 * Create a DL_UNITDATA_IND M_PROTO message. 1462 */ 1463 static mblk_t * 1464 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1465 { 1466 mblk_t *nmp; 1467 dl_unitdata_ind_wrapper_t *dlwp; 1468 dl_unitdata_ind_t *dlp; 1469 mac_header_info_t mhi; 1470 uint_t addr_length; 1471 uint8_t *daddr; 1472 uint8_t *saddr; 1473 1474 /* 1475 * Get the packet header information. 1476 */ 1477 if (mac_vlan_header_info(dsp->ds_mh, mp, &mhi) != 0) 1478 return (NULL); 1479 1480 /* 1481 * Allocate a message large enough to contain the wrapper structure 1482 * defined above. 1483 */ 1484 if ((nmp = mexchange(dsp->ds_wq, NULL, 1485 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1486 DL_UNITDATA_IND)) == NULL) 1487 return (NULL); 1488 1489 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1490 1491 dlp = &(dlwp->dl_unitdata); 1492 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1493 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1494 1495 /* 1496 * Copy in the destination address. 1497 */ 1498 addr_length = dsp->ds_mip->mi_addr_length; 1499 daddr = dlwp->dl_dest_addr; 1500 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1501 bcopy(mhi.mhi_daddr, daddr, addr_length); 1502 1503 /* 1504 * Set the destination DLSAP to the SAP value encoded in the packet. 1505 */ 1506 if (mhi.mhi_istagged && !strip_vlan) 1507 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1508 else 1509 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1510 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1511 1512 /* 1513 * If the destination address was multicast or broadcast then the 1514 * dl_group_address field should be non-zero. 1515 */ 1516 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1517 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1518 1519 /* 1520 * Copy in the source address if one exists. Some MAC types (DL_IB 1521 * for example) may not have access to source information. 1522 */ 1523 if (mhi.mhi_saddr == NULL) { 1524 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1525 } else { 1526 saddr = dlwp->dl_src_addr; 1527 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1528 bcopy(mhi.mhi_saddr, saddr, addr_length); 1529 1530 /* 1531 * Set the source DLSAP to the packet ethertype. 1532 */ 1533 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1534 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1535 } 1536 1537 return (nmp); 1538 } 1539 1540 /* 1541 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1542 */ 1543 static void 1544 str_notify_promisc_on_phys(dld_str_t *dsp) 1545 { 1546 mblk_t *mp; 1547 dl_notify_ind_t *dlip; 1548 1549 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1550 return; 1551 1552 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1553 M_PROTO, 0)) == NULL) 1554 return; 1555 1556 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1557 dlip = (dl_notify_ind_t *)mp->b_rptr; 1558 dlip->dl_primitive = DL_NOTIFY_IND; 1559 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1560 1561 qreply(dsp->ds_wq, mp); 1562 } 1563 1564 /* 1565 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1566 */ 1567 static void 1568 str_notify_promisc_off_phys(dld_str_t *dsp) 1569 { 1570 mblk_t *mp; 1571 dl_notify_ind_t *dlip; 1572 1573 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1574 return; 1575 1576 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1577 M_PROTO, 0)) == NULL) 1578 return; 1579 1580 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1581 dlip = (dl_notify_ind_t *)mp->b_rptr; 1582 dlip->dl_primitive = DL_NOTIFY_IND; 1583 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1584 1585 qreply(dsp->ds_wq, mp); 1586 } 1587 1588 /* 1589 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1590 */ 1591 static void 1592 str_notify_phys_addr(dld_str_t *dsp, uint_t addr_type, const uint8_t *addr) 1593 { 1594 mblk_t *mp; 1595 dl_notify_ind_t *dlip; 1596 uint_t addr_length; 1597 uint16_t ethertype; 1598 1599 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1600 return; 1601 1602 addr_length = dsp->ds_mip->mi_addr_length; 1603 if ((mp = mexchange(dsp->ds_wq, NULL, 1604 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1605 M_PROTO, 0)) == NULL) 1606 return; 1607 1608 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1609 dlip = (dl_notify_ind_t *)mp->b_rptr; 1610 dlip->dl_primitive = DL_NOTIFY_IND; 1611 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1612 dlip->dl_data = addr_type; 1613 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1614 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1615 1616 bcopy(addr, &dlip[1], addr_length); 1617 1618 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1619 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1620 1621 qreply(dsp->ds_wq, mp); 1622 } 1623 1624 /* 1625 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1626 */ 1627 static void 1628 str_notify_link_up(dld_str_t *dsp) 1629 { 1630 mblk_t *mp; 1631 dl_notify_ind_t *dlip; 1632 1633 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1634 return; 1635 1636 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1637 M_PROTO, 0)) == NULL) 1638 return; 1639 1640 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1641 dlip = (dl_notify_ind_t *)mp->b_rptr; 1642 dlip->dl_primitive = DL_NOTIFY_IND; 1643 dlip->dl_notification = DL_NOTE_LINK_UP; 1644 1645 qreply(dsp->ds_wq, mp); 1646 } 1647 1648 /* 1649 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1650 */ 1651 static void 1652 str_notify_link_down(dld_str_t *dsp) 1653 { 1654 mblk_t *mp; 1655 dl_notify_ind_t *dlip; 1656 1657 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1658 return; 1659 1660 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1661 M_PROTO, 0)) == NULL) 1662 return; 1663 1664 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1665 dlip = (dl_notify_ind_t *)mp->b_rptr; 1666 dlip->dl_primitive = DL_NOTIFY_IND; 1667 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1668 1669 qreply(dsp->ds_wq, mp); 1670 } 1671 1672 /* 1673 * DL_NOTIFY_IND: DL_NOTE_SPEED 1674 */ 1675 static void 1676 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1677 { 1678 mblk_t *mp; 1679 dl_notify_ind_t *dlip; 1680 1681 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1682 return; 1683 1684 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1685 M_PROTO, 0)) == NULL) 1686 return; 1687 1688 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1689 dlip = (dl_notify_ind_t *)mp->b_rptr; 1690 dlip->dl_primitive = DL_NOTIFY_IND; 1691 dlip->dl_notification = DL_NOTE_SPEED; 1692 dlip->dl_data = speed; 1693 1694 qreply(dsp->ds_wq, mp); 1695 } 1696 1697 /* 1698 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1699 */ 1700 static void 1701 str_notify_capab_reneg(dld_str_t *dsp) 1702 { 1703 mblk_t *mp; 1704 dl_notify_ind_t *dlip; 1705 1706 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1707 return; 1708 1709 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1710 M_PROTO, 0)) == NULL) 1711 return; 1712 1713 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1714 dlip = (dl_notify_ind_t *)mp->b_rptr; 1715 dlip->dl_primitive = DL_NOTIFY_IND; 1716 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1717 1718 qreply(dsp->ds_wq, mp); 1719 } 1720 1721 /* 1722 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1723 */ 1724 static void 1725 str_notify_fastpath_flush(dld_str_t *dsp) 1726 { 1727 mblk_t *mp; 1728 dl_notify_ind_t *dlip; 1729 1730 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1731 return; 1732 1733 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1734 M_PROTO, 0)) == NULL) 1735 return; 1736 1737 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1738 dlip = (dl_notify_ind_t *)mp->b_rptr; 1739 dlip->dl_primitive = DL_NOTIFY_IND; 1740 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1741 1742 qreply(dsp->ds_wq, mp); 1743 } 1744 1745 static void 1746 str_notify_allowed_ips(dld_str_t *dsp) 1747 { 1748 mblk_t *mp; 1749 dl_notify_ind_t *dlip; 1750 size_t mp_size; 1751 mac_protect_t *mrp; 1752 1753 if (!(dsp->ds_notifications & DL_NOTE_ALLOWED_IPS)) 1754 return; 1755 1756 mp_size = sizeof (mac_protect_t) + sizeof (dl_notify_ind_t); 1757 if ((mp = mexchange(dsp->ds_wq, NULL, mp_size, M_PROTO, 0)) == NULL) 1758 return; 1759 1760 mrp = mac_protect_get(dsp->ds_mh); 1761 bzero(mp->b_rptr, mp_size); 1762 dlip = (dl_notify_ind_t *)mp->b_rptr; 1763 dlip->dl_primitive = DL_NOTIFY_IND; 1764 dlip->dl_notification = DL_NOTE_ALLOWED_IPS; 1765 dlip->dl_data = 0; 1766 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1767 dlip->dl_addr_length = sizeof (mac_protect_t); 1768 bcopy(mrp, mp->b_rptr + sizeof (dl_notify_ind_t), 1769 sizeof (mac_protect_t)); 1770 1771 qreply(dsp->ds_wq, mp); 1772 } 1773 1774 /* 1775 * MAC notification callback. 1776 */ 1777 void 1778 str_notify(void *arg, mac_notify_type_t type) 1779 { 1780 dld_str_t *dsp = (dld_str_t *)arg; 1781 queue_t *q = dsp->ds_wq; 1782 mac_handle_t mh = dsp->ds_mh; 1783 mac_client_handle_t mch = dsp->ds_mch; 1784 uint8_t addr[MAXMACADDRLEN]; 1785 1786 switch (type) { 1787 case MAC_NOTE_TX: 1788 qenable(q); 1789 break; 1790 1791 case MAC_NOTE_DEVPROMISC: 1792 /* 1793 * Send the appropriate DL_NOTIFY_IND. 1794 */ 1795 if (mac_promisc_get(mh)) 1796 str_notify_promisc_on_phys(dsp); 1797 else 1798 str_notify_promisc_off_phys(dsp); 1799 break; 1800 1801 case MAC_NOTE_UNICST: 1802 /* 1803 * This notification is sent whenever the MAC unicast 1804 * address changes. 1805 */ 1806 mac_unicast_primary_get(mh, addr); 1807 1808 /* 1809 * Send the appropriate DL_NOTIFY_IND. 1810 */ 1811 str_notify_phys_addr(dsp, DL_CURR_PHYS_ADDR, addr); 1812 break; 1813 1814 case MAC_NOTE_DEST: 1815 /* 1816 * Only send up DL_NOTE_DEST_ADDR if the link has a 1817 * destination address. 1818 */ 1819 if (mac_dst_get(dsp->ds_mh, addr)) 1820 str_notify_phys_addr(dsp, DL_CURR_DEST_ADDR, addr); 1821 break; 1822 1823 case MAC_NOTE_LOWLINK: 1824 case MAC_NOTE_LINK: 1825 /* 1826 * LOWLINK refers to the actual link status. For links that 1827 * are not part of a bridge instance LOWLINK and LINK state 1828 * are the same. But for a link part of a bridge instance 1829 * LINK state refers to the aggregate link status: "up" when 1830 * at least one link part of the bridge is up and is "down" 1831 * when all links part of the bridge are down. 1832 * 1833 * Clients can request to be notified of the LOWLINK state 1834 * using the DLIOCLOWLINK ioctl. Clients such as the bridge 1835 * daemon request lowlink state changes and upper layer clients 1836 * receive notifications of the aggregate link state changes 1837 * which is the default when requesting LINK UP/DOWN state 1838 * notifications. 1839 */ 1840 1841 /* 1842 * Check that the notification type matches the one that we 1843 * want. If we want lower-level link notifications, and this 1844 * is upper, or if we want upper and this is lower, then 1845 * ignore. 1846 */ 1847 if ((type == MAC_NOTE_LOWLINK) != dsp->ds_lowlink) 1848 break; 1849 /* 1850 * This notification is sent every time the MAC driver 1851 * updates the link state. 1852 */ 1853 switch (mac_client_stat_get(mch, dsp->ds_lowlink ? 1854 MAC_STAT_LOWLINK_STATE : MAC_STAT_LINK_STATE)) { 1855 case LINK_STATE_UP: { 1856 uint64_t speed; 1857 /* 1858 * The link is up so send the appropriate 1859 * DL_NOTIFY_IND. 1860 */ 1861 str_notify_link_up(dsp); 1862 1863 speed = mac_stat_get(mh, MAC_STAT_IFSPEED); 1864 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1865 break; 1866 } 1867 case LINK_STATE_DOWN: 1868 /* 1869 * The link is down so send the appropriate 1870 * DL_NOTIFY_IND. 1871 */ 1872 str_notify_link_down(dsp); 1873 break; 1874 1875 default: 1876 break; 1877 } 1878 break; 1879 1880 case MAC_NOTE_CAPAB_CHG: 1881 /* 1882 * This notification is sent whenever the MAC resources 1883 * change or capabilities change. We need to renegotiate 1884 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1885 */ 1886 str_notify_capab_reneg(dsp); 1887 break; 1888 1889 case MAC_NOTE_SDU_SIZE: { 1890 uint_t max_sdu; 1891 uint_t multicast_sdu; 1892 mac_sdu_get2(dsp->ds_mh, NULL, &max_sdu, &multicast_sdu); 1893 str_notify_sdu_size(dsp, max_sdu, multicast_sdu); 1894 break; 1895 } 1896 1897 case MAC_NOTE_FASTPATH_FLUSH: 1898 str_notify_fastpath_flush(dsp); 1899 break; 1900 1901 /* Unused notifications */ 1902 case MAC_NOTE_MARGIN: 1903 break; 1904 1905 case MAC_NOTE_ALLOWED_IPS: 1906 str_notify_allowed_ips(dsp); 1907 break; 1908 1909 default: 1910 ASSERT(B_FALSE); 1911 break; 1912 } 1913 } 1914 1915 /* 1916 * This function is called via a taskq mechansim to process all control 1917 * messages on a per 'dsp' end point. 1918 */ 1919 static void 1920 dld_wput_nondata_task(void *arg) 1921 { 1922 dld_str_t *dsp = arg; 1923 mblk_t *mp; 1924 1925 mutex_enter(&dsp->ds_lock); 1926 while (dsp->ds_pending_head != NULL) { 1927 mp = dsp->ds_pending_head; 1928 dsp->ds_pending_head = mp->b_next; 1929 mp->b_next = NULL; 1930 if (dsp->ds_pending_head == NULL) 1931 dsp->ds_pending_tail = NULL; 1932 mutex_exit(&dsp->ds_lock); 1933 1934 switch (DB_TYPE(mp)) { 1935 case M_PROTO: 1936 case M_PCPROTO: 1937 dld_proto(dsp, mp); 1938 break; 1939 case M_IOCTL: 1940 dld_ioc(dsp, mp); 1941 break; 1942 default: 1943 ASSERT(0); 1944 } 1945 1946 mutex_enter(&dsp->ds_lock); 1947 } 1948 ASSERT(dsp->ds_pending_tail == NULL); 1949 dsp->ds_dlpi_pending = 0; 1950 cv_broadcast(&dsp->ds_dlpi_pending_cv); 1951 mutex_exit(&dsp->ds_lock); 1952 } 1953 1954 /* 1955 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This 1956 * thread is started at boot time. 1957 */ 1958 static void 1959 dld_taskq_dispatch(void) 1960 { 1961 callb_cpr_t cprinfo; 1962 dld_str_t *dsp; 1963 1964 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr, 1965 "dld_taskq_dispatch"); 1966 mutex_enter(&dld_taskq_lock); 1967 1968 while (!dld_taskq_quit) { 1969 dsp = list_head(&dld_taskq_list); 1970 while (dsp != NULL) { 1971 list_remove(&dld_taskq_list, dsp); 1972 mutex_exit(&dld_taskq_lock); 1973 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task, 1974 dsp, TQ_SLEEP) != TASKQID_INVALID); 1975 mutex_enter(&dld_taskq_lock); 1976 dsp = list_head(&dld_taskq_list); 1977 } 1978 1979 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1980 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 1981 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock); 1982 } 1983 1984 dld_taskq_done = B_TRUE; 1985 cv_signal(&dld_taskq_cv); 1986 CALLB_CPR_EXIT(&cprinfo); 1987 thread_exit(); 1988 } 1989 1990 /* 1991 * All control operations are serialized on the 'dsp' and are also funneled 1992 * through a taskq mechanism to ensure that subsequent processing has kernel 1993 * context and can safely use cv_wait. 1994 * 1995 * Mechanisms to handle taskq dispatch failures 1996 * 1997 * The only way to be sure that taskq dispatch does not fail is to either 1998 * specify TQ_SLEEP or to use a static taskq and prepopulate it with 1999 * some number of entries and make sure that the number of outstanding requests 2000 * are less than that number. We can't use TQ_SLEEP since we don't know the 2001 * context. Nor can we bound the total number of 'dsp' end points. So we are 2002 * unable to use either of the above schemes, and are forced to deal with 2003 * taskq dispatch failures. Note that even dynamic taskq could fail in 2004 * dispatch if TQ_NOSLEEP is specified, since this flag is translated 2005 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq 2006 * framework. 2007 * 2008 * We maintain a queue of 'dsp's that encountered taskq dispatch failure. 2009 * We also have a single global thread to retry the taskq dispatch. This 2010 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but 2011 * uses TQ_SLEEP to ensure eventual success of the dispatch operation. 2012 */ 2013 static void 2014 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) 2015 { 2016 ASSERT(mp->b_next == NULL); 2017 mutex_enter(&dsp->ds_lock); 2018 if (dsp->ds_pending_head != NULL) { 2019 ASSERT(dsp->ds_dlpi_pending); 2020 dsp->ds_pending_tail->b_next = mp; 2021 dsp->ds_pending_tail = mp; 2022 mutex_exit(&dsp->ds_lock); 2023 return; 2024 } 2025 ASSERT(dsp->ds_pending_tail == NULL); 2026 dsp->ds_pending_head = dsp->ds_pending_tail = mp; 2027 /* 2028 * At this point if ds_dlpi_pending is set, it implies that the taskq 2029 * thread is still active and is processing the last message, though 2030 * the pending queue has been emptied. 2031 */ 2032 if (dsp->ds_dlpi_pending) { 2033 mutex_exit(&dsp->ds_lock); 2034 return; 2035 } 2036 2037 dsp->ds_dlpi_pending = 1; 2038 mutex_exit(&dsp->ds_lock); 2039 2040 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp, 2041 TQ_NOSLEEP) != TASKQID_INVALID) 2042 return; 2043 2044 mutex_enter(&dld_taskq_lock); 2045 list_insert_tail(&dld_taskq_list, dsp); 2046 cv_signal(&dld_taskq_cv); 2047 mutex_exit(&dld_taskq_lock); 2048 } 2049 2050 /* 2051 * Process an M_IOCTL message. 2052 */ 2053 static void 2054 dld_ioc(dld_str_t *dsp, mblk_t *mp) 2055 { 2056 uint_t cmd; 2057 2058 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 2059 ASSERT(dsp->ds_type == DLD_DLPI); 2060 2061 switch (cmd) { 2062 case DLIOCNATIVE: 2063 ioc_native(dsp, mp); 2064 break; 2065 case DLIOCMARGININFO: 2066 ioc_margin(dsp, mp); 2067 break; 2068 case DLIOCRAW: 2069 ioc_raw(dsp, mp); 2070 break; 2071 case DLIOCHDRINFO: 2072 ioc_fast(dsp, mp); 2073 break; 2074 case DLIOCLOWLINK: 2075 ioc_lowlink(dsp, mp); 2076 break; 2077 default: 2078 ioc(dsp, mp); 2079 } 2080 } 2081 2082 /* 2083 * DLIOCNATIVE 2084 */ 2085 static void 2086 ioc_native(dld_str_t *dsp, mblk_t *mp) 2087 { 2088 queue_t *q = dsp->ds_wq; 2089 const mac_info_t *mip = dsp->ds_mip; 2090 2091 /* 2092 * Native mode can be enabled if it's disabled and if the 2093 * native media type is different. 2094 */ 2095 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 2096 dsp->ds_native = B_TRUE; 2097 2098 if (dsp->ds_native) 2099 miocack(q, mp, 0, mip->mi_nativemedia); 2100 else 2101 miocnak(q, mp, 0, ENOTSUP); 2102 } 2103 2104 /* 2105 * DLIOCMARGININFO 2106 */ 2107 static void 2108 ioc_margin(dld_str_t *dsp, mblk_t *mp) 2109 { 2110 queue_t *q = dsp->ds_wq; 2111 uint32_t margin; 2112 int err; 2113 2114 if (dsp->ds_dlstate == DL_UNATTACHED) { 2115 err = EINVAL; 2116 goto failed; 2117 } 2118 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) 2119 goto failed; 2120 2121 mac_margin_get(dsp->ds_mh, &margin); 2122 *((uint32_t *)mp->b_cont->b_rptr) = margin; 2123 miocack(q, mp, sizeof (uint32_t), 0); 2124 return; 2125 2126 failed: 2127 miocnak(q, mp, 0, err); 2128 } 2129 2130 /* 2131 * DLIOCRAW 2132 */ 2133 static void 2134 ioc_raw(dld_str_t *dsp, mblk_t *mp) 2135 { 2136 queue_t *q = dsp->ds_wq; 2137 mac_perim_handle_t mph; 2138 2139 if (dsp->ds_mh == NULL) { 2140 dsp->ds_mode = DLD_RAW; 2141 miocack(q, mp, 0, 0); 2142 return; 2143 } 2144 2145 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2146 if (dsp->ds_polling || dsp->ds_direct) { 2147 mac_perim_exit(mph); 2148 miocnak(q, mp, 0, EPROTO); 2149 return; 2150 } 2151 2152 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 2153 /* 2154 * Set the receive callback. 2155 */ 2156 dls_rx_set(dsp, dld_str_rx_raw, dsp); 2157 } 2158 2159 /* 2160 * Note that raw mode is enabled. 2161 */ 2162 dsp->ds_mode = DLD_RAW; 2163 mac_perim_exit(mph); 2164 2165 miocack(q, mp, 0, 0); 2166 } 2167 2168 /* 2169 * DLIOCHDRINFO 2170 */ 2171 static void 2172 ioc_fast(dld_str_t *dsp, mblk_t *mp) 2173 { 2174 dl_unitdata_req_t *dlp; 2175 off_t off; 2176 size_t len; 2177 const uint8_t *addr; 2178 uint16_t sap; 2179 mblk_t *nmp; 2180 mblk_t *hmp; 2181 uint_t addr_length; 2182 queue_t *q = dsp->ds_wq; 2183 int err; 2184 mac_perim_handle_t mph; 2185 2186 if (dld_opt & DLD_OPT_NO_FASTPATH) { 2187 err = ENOTSUP; 2188 goto failed; 2189 } 2190 2191 /* 2192 * DLIOCHDRINFO should only come from IP. The one initiated from 2193 * user-land should not be allowed. 2194 */ 2195 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 2196 err = EINVAL; 2197 goto failed; 2198 } 2199 2200 nmp = mp->b_cont; 2201 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 2202 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 2203 dlp->dl_primitive != DL_UNITDATA_REQ)) { 2204 err = EINVAL; 2205 goto failed; 2206 } 2207 2208 off = dlp->dl_dest_addr_offset; 2209 len = dlp->dl_dest_addr_length; 2210 2211 if (!MBLKIN(nmp, off, len)) { 2212 err = EINVAL; 2213 goto failed; 2214 } 2215 2216 if (dsp->ds_dlstate != DL_IDLE) { 2217 err = ENOTSUP; 2218 goto failed; 2219 } 2220 2221 addr_length = dsp->ds_mip->mi_addr_length; 2222 if (len != addr_length + sizeof (uint16_t)) { 2223 err = EINVAL; 2224 goto failed; 2225 } 2226 2227 addr = nmp->b_rptr + off; 2228 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2229 2230 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) { 2231 err = ENOMEM; 2232 goto failed; 2233 } 2234 2235 /* 2236 * This ioctl might happen concurrently with a direct call to dld_capab 2237 * that tries to enable direct and/or poll capabilities. Since the 2238 * stack does not serialize them, we do so here to avoid mixing 2239 * the callbacks. 2240 */ 2241 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2242 if (dsp->ds_mode != DLD_FASTPATH) { 2243 /* 2244 * Set the receive callback (unless polling is enabled). 2245 */ 2246 if (!dsp->ds_polling && !dsp->ds_direct) 2247 dls_rx_set(dsp, dld_str_rx_fastpath, dsp); 2248 2249 /* 2250 * Note that fast-path mode is enabled. 2251 */ 2252 dsp->ds_mode = DLD_FASTPATH; 2253 } 2254 mac_perim_exit(mph); 2255 2256 freemsg(nmp->b_cont); 2257 nmp->b_cont = hmp; 2258 2259 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2260 return; 2261 failed: 2262 miocnak(q, mp, 0, err); 2263 } 2264 2265 /* 2266 * DLIOCLOWLINK: request actual link state changes. When the 2267 * link is part of a bridge instance the client receives actual 2268 * link state changes and not the aggregate link status. Used by 2269 * the bridging daemon (bridged) for proper RSTP operation. 2270 */ 2271 static void 2272 ioc_lowlink(dld_str_t *dsp, mblk_t *mp) 2273 { 2274 queue_t *q = dsp->ds_wq; 2275 int err; 2276 2277 if ((err = miocpullup(mp, sizeof (int))) != 0) { 2278 miocnak(q, mp, 0, err); 2279 } else { 2280 /* LINTED: alignment */ 2281 dsp->ds_lowlink = *(boolean_t *)mp->b_cont->b_rptr; 2282 miocack(q, mp, 0, 0); 2283 } 2284 } 2285 2286 /* 2287 * Catch-all handler. 2288 */ 2289 static void 2290 ioc(dld_str_t *dsp, mblk_t *mp) 2291 { 2292 queue_t *q = dsp->ds_wq; 2293 2294 if (dsp->ds_dlstate == DL_UNATTACHED) { 2295 miocnak(q, mp, 0, EINVAL); 2296 return; 2297 } 2298 mac_ioctl(dsp->ds_mh, q, mp); 2299 } 2300