1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Data-Link Driver 28 */ 29 30 #include <inet/common.h> 31 #include <sys/strsubr.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/vlan.h> 35 #include <sys/dld_impl.h> 36 #include <sys/cpuvar.h> 37 #include <sys/callb.h> 38 #include <sys/list.h> 39 #include <sys/mac_client.h> 40 #include <sys/mac_client_priv.h> 41 42 static int str_constructor(void *, void *, int); 43 static void str_destructor(void *, void *); 44 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 45 static void str_notify_promisc_on_phys(dld_str_t *); 46 static void str_notify_promisc_off_phys(dld_str_t *); 47 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 48 static void str_notify_link_up(dld_str_t *); 49 static void str_notify_link_down(dld_str_t *); 50 static void str_notify_capab_reneg(dld_str_t *); 51 static void str_notify_speed(dld_str_t *, uint32_t); 52 53 static void ioc_native(dld_str_t *, mblk_t *); 54 static void ioc_margin(dld_str_t *, mblk_t *); 55 static void ioc_raw(dld_str_t *, mblk_t *); 56 static void ioc_fast(dld_str_t *, mblk_t *); 57 static void ioc(dld_str_t *, mblk_t *); 58 static void dld_ioc(dld_str_t *, mblk_t *); 59 static void dld_wput_nondata(dld_str_t *, mblk_t *); 60 61 static void str_mdata_raw_put(dld_str_t *, mblk_t *); 62 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t, 63 link_tagmode_t); 64 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *, boolean_t); 65 66 static uint32_t str_count; 67 static kmem_cache_t *str_cachep; 68 static mod_hash_t *str_hashp; 69 70 #define STR_HASHSZ 64 71 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 72 73 #define dld_taskq system_taskq 74 75 static kmutex_t dld_taskq_lock; 76 static kcondvar_t dld_taskq_cv; 77 static list_t dld_taskq_list; /* List of dld_str_t */ 78 boolean_t dld_taskq_quit; 79 boolean_t dld_taskq_done; 80 81 static void dld_taskq_dispatch(void); 82 83 /* 84 * Some notes on entry points, flow-control, queueing. 85 * 86 * This driver exports the traditional STREAMS put entry point as well as 87 * the non-STREAMS fast-path transmit routine which is provided to IP via 88 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 89 * and data operations, while the fast-path routine deals only with M_DATA 90 * fast-path packets. Regardless of the entry point, all outbound packets 91 * will end up in DLD_TX(), where they will be delivered to the MAC layer. 92 * 93 * The transmit logic operates in the following way: All packets coming 94 * into DLD will be sent to the MAC layer through DLD_TX(). Flow-control 95 * happens when the MAC layer indicates the packets couldn't be 96 * transmitted due to 1) lack of resources (e.g. running out of 97 * descriptors), or 2) reaching the allowed bandwidth limit for this 98 * particular flow. The indication comes in the form of a Tx cookie that 99 * identifies the blocked ring. In such case, DLD will place a 100 * dummy message on its write-side STREAMS queue so that the queue is 101 * marked as "full". Any subsequent packets arriving at the driver will 102 * still be sent to the MAC layer where it either gets queued in the Tx 103 * SRS or discarded it if queue limit is exceeded. The write-side STREAMS 104 * queue gets enabled when MAC layer notifies DLD through MAC_NOTE_TX. 105 * When the write service procedure runs, it will remove the dummy 106 * message from the write-side STREAMS queue; in effect this will trigger 107 * backenabling. The sizes of q_hiwat and q_lowat are set to 1 and 0, 108 * respectively, due to the above reasons. 109 * 110 * All non-data operations, both DLPI and ioctls are single threaded on a per 111 * dld_str_t endpoint. This is done using a taskq so that the control operation 112 * has kernel context and can cv_wait for resources. In addition all set type 113 * operations that involve mac level state modification are serialized on a 114 * per mac end point using the perimeter mechanism provided by the mac layer. 115 * This serializes all mac clients trying to modify a single mac end point over 116 * the entire sequence of mac calls made by that client as an atomic unit. The 117 * mac framework locking is described in mac.c. A critical element is that 118 * DLD/DLS does not hold any locks across the mac perimeter. 119 * 120 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 121 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 122 * match dev_t. If a stream is found and it is attached, its dev_info_t * 123 * is returned. If the mac handle is non-null, it can be safely accessed 124 * below. The mac handle won't be freed until the mac_unregister which 125 * won't happen until the driver detaches. The DDI framework ensures that 126 * the detach won't happen while a getinfo is in progress. 127 */ 128 typedef struct i_dld_str_state_s { 129 major_t ds_major; 130 minor_t ds_minor; 131 dev_info_t *ds_dip; 132 } i_dld_str_state_t; 133 134 /* ARGSUSED */ 135 static uint_t 136 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 137 { 138 i_dld_str_state_t *statep = arg; 139 dld_str_t *dsp = (dld_str_t *)val; 140 mac_handle_t mh; 141 142 if (statep->ds_major != dsp->ds_major) 143 return (MH_WALK_CONTINUE); 144 145 ASSERT(statep->ds_minor != 0); 146 mh = dsp->ds_mh; 147 148 if (statep->ds_minor == dsp->ds_minor) { 149 /* 150 * Clone: a clone minor is unique. we can terminate the 151 * walk if we find a matching stream -- even if we fail 152 * to obtain the devinfo. 153 */ 154 if (mh != NULL) 155 statep->ds_dip = mac_devinfo_get(mh); 156 return (MH_WALK_TERMINATE); 157 } 158 return (MH_WALK_CONTINUE); 159 } 160 161 static dev_info_t * 162 dld_finddevinfo(dev_t dev) 163 { 164 dev_info_t *dip; 165 i_dld_str_state_t state; 166 167 if (getminor(dev) == 0) 168 return (NULL); 169 170 /* 171 * See if it's a minor node of a link 172 */ 173 if ((dip = dls_link_devinfo(dev)) != NULL) 174 return (dip); 175 176 state.ds_minor = getminor(dev); 177 state.ds_major = getmajor(dev); 178 state.ds_dip = NULL; 179 180 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 181 return (state.ds_dip); 182 } 183 184 /* 185 * devo_getinfo: getinfo(9e) 186 */ 187 /*ARGSUSED*/ 188 int 189 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 190 { 191 dev_info_t *devinfo; 192 minor_t minor = getminor((dev_t)arg); 193 int rc = DDI_FAILURE; 194 195 switch (cmd) { 196 case DDI_INFO_DEVT2DEVINFO: 197 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 198 *(dev_info_t **)resp = devinfo; 199 rc = DDI_SUCCESS; 200 } 201 break; 202 case DDI_INFO_DEVT2INSTANCE: 203 if (minor > 0 && minor <= DLS_MAX_MINOR) { 204 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 205 rc = DDI_SUCCESS; 206 } else if (minor > DLS_MAX_MINOR && 207 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 208 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 209 rc = DDI_SUCCESS; 210 } 211 break; 212 } 213 return (rc); 214 } 215 216 void * 217 dld_str_private(queue_t *q) 218 { 219 return (((dld_str_t *)(q->q_ptr))->ds_private); 220 } 221 222 int 223 dld_str_open(queue_t *rq, dev_t *devp, void *private) 224 { 225 dld_str_t *dsp; 226 major_t major; 227 minor_t minor; 228 int err; 229 230 major = getmajor(*devp); 231 minor = getminor(*devp); 232 233 /* 234 * Create a new dld_str_t for the stream. This will grab a new minor 235 * number that will be handed back in the cloned dev_t. Creation may 236 * fail if we can't allocate the dummy mblk used for flow-control. 237 */ 238 dsp = dld_str_create(rq, DLD_DLPI, major, 239 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 240 if (dsp == NULL) 241 return (ENOSR); 242 243 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 244 dsp->ds_private = private; 245 if (minor != 0) { 246 /* 247 * Style 1 open 248 */ 249 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 250 goto failed; 251 252 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 253 } else { 254 (void) qassociate(rq, -1); 255 } 256 257 /* 258 * Enable the queue srv(9e) routine. 259 */ 260 qprocson(rq); 261 262 /* 263 * Construct a cloned dev_t to hand back. 264 */ 265 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 266 return (0); 267 268 failed: 269 dld_str_destroy(dsp); 270 return (err); 271 } 272 273 int 274 dld_str_close(queue_t *rq) 275 { 276 dld_str_t *dsp = rq->q_ptr; 277 278 /* 279 * All modules on top have been popped off. So there can't be any 280 * threads from the top. 281 */ 282 ASSERT(dsp->ds_datathr_cnt == 0); 283 284 /* 285 * Wait until pending DLPI requests are processed. 286 */ 287 mutex_enter(&dsp->ds_lock); 288 while (dsp->ds_dlpi_pending) 289 cv_wait(&dsp->ds_dlpi_pending_cv, &dsp->ds_lock); 290 mutex_exit(&dsp->ds_lock); 291 292 293 /* 294 * This stream was open to a provider node. Check to see 295 * if it has been cleanly shut down. 296 */ 297 if (dsp->ds_dlstate != DL_UNATTACHED) { 298 /* 299 * The stream is either open to a style 1 provider or 300 * this is not clean shutdown. Detach from the PPA. 301 * (This is still ok even in the style 1 case). 302 */ 303 dld_str_detach(dsp); 304 } 305 306 dld_str_destroy(dsp); 307 return (0); 308 } 309 310 /* 311 * qi_qopen: open(9e) 312 */ 313 /*ARGSUSED*/ 314 int 315 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 316 { 317 if (sflag == MODOPEN) 318 return (ENOTSUP); 319 320 /* 321 * This is a cloning driver and therefore each queue should only 322 * ever get opened once. 323 */ 324 if (rq->q_ptr != NULL) 325 return (EBUSY); 326 327 return (dld_str_open(rq, devp, NULL)); 328 } 329 330 /* 331 * qi_qclose: close(9e) 332 */ 333 int 334 dld_close(queue_t *rq) 335 { 336 /* 337 * Disable the queue srv(9e) routine. 338 */ 339 qprocsoff(rq); 340 341 return (dld_str_close(rq)); 342 } 343 344 /* 345 * qi_qputp: put(9e) 346 */ 347 void 348 dld_wput(queue_t *wq, mblk_t *mp) 349 { 350 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 351 dld_str_mode_t mode; 352 353 switch (DB_TYPE(mp)) { 354 case M_DATA: 355 mutex_enter(&dsp->ds_lock); 356 mode = dsp->ds_mode; 357 if ((dsp->ds_dlstate != DL_IDLE) || 358 (mode != DLD_FASTPATH && mode != DLD_RAW)) { 359 mutex_exit(&dsp->ds_lock); 360 freemsg(mp); 361 break; 362 } 363 364 DLD_DATATHR_INC(dsp); 365 mutex_exit(&dsp->ds_lock); 366 if (mode == DLD_FASTPATH) { 367 if (dsp->ds_mip->mi_media == DL_ETHER && 368 (MBLKL(mp) < sizeof (struct ether_header))) { 369 freemsg(mp); 370 } else { 371 (void) str_mdata_fastpath_put(dsp, mp, 0, 0); 372 } 373 } else { 374 str_mdata_raw_put(dsp, mp); 375 } 376 DLD_DATATHR_DCR(dsp); 377 break; 378 case M_PROTO: 379 case M_PCPROTO: { 380 t_uscalar_t prim; 381 382 if (MBLKL(mp) < sizeof (t_uscalar_t)) 383 break; 384 385 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 386 387 if (prim == DL_UNITDATA_REQ) { 388 proto_unitdata_req(dsp, mp); 389 } else { 390 dld_wput_nondata(dsp, mp); 391 } 392 break; 393 } 394 395 case M_IOCTL: 396 dld_wput_nondata(dsp, mp); 397 break; 398 399 case M_FLUSH: 400 if (*mp->b_rptr & FLUSHW) { 401 DLD_CLRQFULL(dsp); 402 *mp->b_rptr &= ~FLUSHW; 403 } 404 405 if (*mp->b_rptr & FLUSHR) { 406 qreply(wq, mp); 407 } else { 408 freemsg(mp); 409 } 410 break; 411 412 default: 413 freemsg(mp); 414 break; 415 } 416 } 417 418 /* 419 * qi_srvp: srv(9e) 420 */ 421 void 422 dld_wsrv(queue_t *wq) 423 { 424 dld_str_t *dsp = wq->q_ptr; 425 426 DLD_CLRQFULL(dsp); 427 } 428 429 void 430 dld_init_ops(struct dev_ops *ops, const char *name) 431 { 432 struct streamtab *stream; 433 struct qinit *rq, *wq; 434 struct module_info *modinfo; 435 436 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 437 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 438 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 439 modinfo->mi_minpsz = 0; 440 modinfo->mi_maxpsz = 64*1024; 441 modinfo->mi_hiwat = 1; 442 modinfo->mi_lowat = 0; 443 444 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 445 rq->qi_qopen = dld_open; 446 rq->qi_qclose = dld_close; 447 rq->qi_minfo = modinfo; 448 449 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 450 wq->qi_putp = (pfi_t)dld_wput; 451 wq->qi_srvp = (pfi_t)dld_wsrv; 452 wq->qi_minfo = modinfo; 453 454 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 455 stream->st_rdinit = rq; 456 stream->st_wrinit = wq; 457 ops->devo_cb_ops->cb_str = stream; 458 459 if (ops->devo_getinfo == NULL) 460 ops->devo_getinfo = &dld_getinfo; 461 } 462 463 void 464 dld_fini_ops(struct dev_ops *ops) 465 { 466 struct streamtab *stream; 467 struct qinit *rq, *wq; 468 struct module_info *modinfo; 469 470 stream = ops->devo_cb_ops->cb_str; 471 rq = stream->st_rdinit; 472 wq = stream->st_wrinit; 473 modinfo = rq->qi_minfo; 474 ASSERT(wq->qi_minfo == modinfo); 475 476 kmem_free(stream, sizeof (struct streamtab)); 477 kmem_free(wq, sizeof (struct qinit)); 478 kmem_free(rq, sizeof (struct qinit)); 479 kmem_free(modinfo->mi_idname, FMNAMESZ); 480 kmem_free(modinfo, sizeof (struct module_info)); 481 } 482 483 /* 484 * Initialize this module's data structures. 485 */ 486 void 487 dld_str_init(void) 488 { 489 /* 490 * Create dld_str_t object cache. 491 */ 492 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 493 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 494 ASSERT(str_cachep != NULL); 495 496 /* 497 * Create a hash table for maintaining dld_str_t's. 498 * The ds_minor field (the clone minor number) of a dld_str_t 499 * is used as a key for this hash table because this number is 500 * globally unique (allocated from "dls_minor_arena"). 501 */ 502 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 503 mod_hash_null_valdtor); 504 505 mutex_init(&dld_taskq_lock, NULL, MUTEX_DRIVER, NULL); 506 cv_init(&dld_taskq_cv, NULL, CV_DRIVER, NULL); 507 508 dld_taskq_quit = B_FALSE; 509 dld_taskq_done = B_FALSE; 510 list_create(&dld_taskq_list, sizeof (dld_str_t), 511 offsetof(dld_str_t, ds_tqlist)); 512 (void) thread_create(NULL, 0, dld_taskq_dispatch, NULL, 0, 513 &p0, TS_RUN, minclsyspri); 514 } 515 516 /* 517 * Tear down this module's data structures. 518 */ 519 int 520 dld_str_fini(void) 521 { 522 /* 523 * Make sure that there are no objects in use. 524 */ 525 if (str_count != 0) 526 return (EBUSY); 527 528 /* 529 * Ask the dld_taskq thread to quit and wait for it to be done 530 */ 531 mutex_enter(&dld_taskq_lock); 532 dld_taskq_quit = B_TRUE; 533 cv_signal(&dld_taskq_cv); 534 while (!dld_taskq_done) 535 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 536 mutex_exit(&dld_taskq_lock); 537 list_destroy(&dld_taskq_list); 538 /* 539 * Destroy object cache. 540 */ 541 kmem_cache_destroy(str_cachep); 542 mod_hash_destroy_idhash(str_hashp); 543 return (0); 544 } 545 546 /* 547 * Create a new dld_str_t object. 548 */ 549 dld_str_t * 550 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 551 { 552 dld_str_t *dsp; 553 int err; 554 555 /* 556 * Allocate an object from the cache. 557 */ 558 atomic_add_32(&str_count, 1); 559 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 560 561 /* 562 * Allocate the dummy mblk for flow-control. 563 */ 564 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 565 if (dsp->ds_tx_flow_mp == NULL) { 566 kmem_cache_free(str_cachep, dsp); 567 atomic_add_32(&str_count, -1); 568 return (NULL); 569 } 570 dsp->ds_type = type; 571 dsp->ds_major = major; 572 dsp->ds_style = style; 573 574 /* 575 * Initialize the queue pointers. 576 */ 577 ASSERT(RD(rq) == rq); 578 dsp->ds_rq = rq; 579 dsp->ds_wq = WR(rq); 580 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 581 582 /* 583 * We want explicit control over our write-side STREAMS queue 584 * where the dummy mblk gets added/removed for flow-control. 585 */ 586 noenable(WR(rq)); 587 588 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 589 (mod_hash_val_t)dsp); 590 ASSERT(err == 0); 591 return (dsp); 592 } 593 594 /* 595 * Destroy a dld_str_t object. 596 */ 597 void 598 dld_str_destroy(dld_str_t *dsp) 599 { 600 queue_t *rq; 601 queue_t *wq; 602 mod_hash_val_t val; 603 604 /* 605 * Clear the queue pointers. 606 */ 607 rq = dsp->ds_rq; 608 wq = dsp->ds_wq; 609 ASSERT(wq == WR(rq)); 610 rq->q_ptr = wq->q_ptr = NULL; 611 dsp->ds_rq = dsp->ds_wq = NULL; 612 613 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 614 ASSERT(dsp->ds_sap == 0); 615 ASSERT(dsp->ds_mh == NULL); 616 ASSERT(dsp->ds_mch == NULL); 617 ASSERT(dsp->ds_promisc == 0); 618 ASSERT(dsp->ds_mph == NULL); 619 ASSERT(dsp->ds_mip == NULL); 620 ASSERT(dsp->ds_mnh == NULL); 621 622 ASSERT(dsp->ds_polling == B_FALSE); 623 ASSERT(dsp->ds_direct == B_FALSE); 624 ASSERT(dsp->ds_lso == B_FALSE); 625 ASSERT(dsp->ds_lso_max == 0); 626 ASSERT(dsp->ds_passivestate != DLD_ACTIVE); 627 628 /* 629 * Reinitialize all the flags. 630 */ 631 dsp->ds_notifications = 0; 632 dsp->ds_passivestate = DLD_UNINITIALIZED; 633 dsp->ds_mode = DLD_UNITDATA; 634 dsp->ds_native = B_FALSE; 635 636 ASSERT(dsp->ds_datathr_cnt == 0); 637 ASSERT(dsp->ds_pending_head == NULL); 638 ASSERT(dsp->ds_pending_tail == NULL); 639 ASSERT(!dsp->ds_dlpi_pending); 640 641 ASSERT(dsp->ds_dlp == NULL); 642 ASSERT(dsp->ds_dmap == NULL); 643 ASSERT(dsp->ds_rx == NULL); 644 ASSERT(dsp->ds_rx_arg == NULL); 645 ASSERT(dsp->ds_next == NULL); 646 ASSERT(dsp->ds_head == NULL); 647 648 /* 649 * Free the dummy mblk if exists. 650 */ 651 if (dsp->ds_tx_flow_mp != NULL) { 652 freeb(dsp->ds_tx_flow_mp); 653 dsp->ds_tx_flow_mp = NULL; 654 } 655 656 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 657 ASSERT(dsp == (dld_str_t *)val); 658 659 /* 660 * Free the object back to the cache. 661 */ 662 kmem_cache_free(str_cachep, dsp); 663 atomic_add_32(&str_count, -1); 664 } 665 666 /* 667 * kmem_cache contructor function: see kmem_cache_create(9f). 668 */ 669 /*ARGSUSED*/ 670 static int 671 str_constructor(void *buf, void *cdrarg, int kmflags) 672 { 673 dld_str_t *dsp = buf; 674 675 bzero(buf, sizeof (dld_str_t)); 676 677 /* 678 * Allocate a new minor number. 679 */ 680 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) 681 return (-1); 682 683 /* 684 * Initialize the DLPI state machine. 685 */ 686 dsp->ds_dlstate = DL_UNATTACHED; 687 688 mutex_init(&dsp->ds_lock, NULL, MUTEX_DRIVER, NULL); 689 cv_init(&dsp->ds_datathr_cv, NULL, CV_DRIVER, NULL); 690 cv_init(&dsp->ds_dlpi_pending_cv, NULL, CV_DRIVER, NULL); 691 692 return (0); 693 } 694 695 /* 696 * kmem_cache destructor function. 697 */ 698 /*ARGSUSED*/ 699 static void 700 str_destructor(void *buf, void *cdrarg) 701 { 702 dld_str_t *dsp = buf; 703 704 /* 705 * Release the minor number. 706 */ 707 mac_minor_rele(dsp->ds_minor); 708 709 ASSERT(dsp->ds_tx_flow_mp == NULL); 710 711 mutex_destroy(&dsp->ds_lock); 712 cv_destroy(&dsp->ds_datathr_cv); 713 cv_destroy(&dsp->ds_dlpi_pending_cv); 714 } 715 716 /* 717 * Update the priority bits and VID (may need to insert tag if mp points 718 * to an untagged packet. 719 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 720 */ 721 static mblk_t * 722 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid, 723 link_tagmode_t tagmode) 724 { 725 mblk_t *hmp; 726 struct ether_vlan_header *evhp; 727 struct ether_header *ehp; 728 uint16_t old_tci = 0; 729 size_t len; 730 731 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 732 733 evhp = (struct ether_vlan_header *)mp->b_rptr; 734 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 735 /* 736 * Tagged packet, update the priority bits. 737 */ 738 len = sizeof (struct ether_vlan_header); 739 740 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 741 /* 742 * In case some drivers only check the db_ref 743 * count of the first mblk, we pullup the 744 * message into a single mblk. 745 */ 746 hmp = msgpullup(mp, -1); 747 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 748 freemsg(hmp); 749 return (NULL); 750 } else { 751 freemsg(mp); 752 mp = hmp; 753 } 754 } 755 756 evhp = (struct ether_vlan_header *)mp->b_rptr; 757 old_tci = ntohs(evhp->ether_tci); 758 } else { 759 /* 760 * Untagged packet. Two factors will cause us to insert a 761 * VLAN header: 762 * - This is a VLAN link (vid is specified) 763 * - The link supports user priority tagging and the priority 764 * is non-zero. 765 */ 766 if (vid == VLAN_ID_NONE && tagmode == LINK_TAGMODE_VLANONLY) 767 return (mp); 768 769 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 770 if (hmp == NULL) 771 return (NULL); 772 773 evhp = (struct ether_vlan_header *)hmp->b_rptr; 774 ehp = (struct ether_header *)mp->b_rptr; 775 776 /* 777 * Copy the MAC addresses and typelen 778 */ 779 bcopy(ehp, evhp, (ETHERADDRL * 2)); 780 evhp->ether_type = ehp->ether_type; 781 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 782 783 hmp->b_wptr += sizeof (struct ether_vlan_header); 784 mp->b_rptr += sizeof (struct ether_header); 785 786 /* 787 * Free the original message if it's now empty. Link the 788 * rest of the messages to the header message. 789 */ 790 if (MBLKL(mp) == 0) { 791 hmp->b_cont = mp->b_cont; 792 freeb(mp); 793 } else { 794 hmp->b_cont = mp; 795 } 796 mp = hmp; 797 } 798 799 if (pri == 0) 800 pri = VLAN_PRI(old_tci); 801 if (vid == VLAN_ID_NONE) 802 vid = VLAN_ID(old_tci); 803 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 804 return (mp); 805 } 806 807 /* 808 * M_DATA put (IP fast-path mode) 809 */ 810 mac_tx_cookie_t 811 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp, uintptr_t f_hint, 812 uint16_t flag) 813 { 814 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 815 mblk_t *newmp; 816 uint_t pri; 817 mac_tx_cookie_t cookie; 818 819 if (is_ethernet) { 820 /* 821 * Update the priority bits to the assigned priority. 822 */ 823 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 824 825 if (pri != 0) { 826 newmp = i_dld_ether_header_update_tag(mp, pri, 827 VLAN_ID_NONE, dsp->ds_dlp->dl_tagmode); 828 if (newmp == NULL) 829 goto discard; 830 mp = newmp; 831 } 832 } 833 834 if ((cookie = DLD_TX(dsp, mp, f_hint, flag)) != NULL) { 835 DLD_SETQFULL(dsp); 836 } 837 return (cookie); 838 839 discard: 840 /* TODO: bump kstat? */ 841 freemsg(mp); 842 return (NULL); 843 } 844 845 /* 846 * M_DATA put (DLIOCRAW mode) 847 */ 848 static void 849 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 850 { 851 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 852 mblk_t *bp, *newmp; 853 size_t size; 854 mac_header_info_t mhi; 855 uint_t pri, vid, dvid; 856 uint_t max_sdu; 857 858 /* 859 * Certain MAC type plugins provide an illusion for raw DLPI 860 * consumers. They pretend that the MAC layer is something that 861 * it's not for the benefit of observability tools. For example, 862 * mac_wifi pretends that it's Ethernet for such consumers. 863 * Here, unless native mode is enabled, we call into the MAC layer so 864 * that this illusion can be maintained. The plugin will optionally 865 * transform the MAC header here into something that can be passed 866 * down. The header goes from raw mode to "cooked" mode. 867 */ 868 if (!dsp->ds_native) { 869 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 870 goto discard; 871 mp = newmp; 872 } 873 874 size = MBLKL(mp); 875 876 /* 877 * Check the packet is not too big and that any remaining 878 * fragment list is composed entirely of M_DATA messages. (We 879 * know the first fragment was M_DATA otherwise we could not 880 * have got here). 881 */ 882 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 883 if (DB_TYPE(bp) != M_DATA) 884 goto discard; 885 size += MBLKL(bp); 886 } 887 888 if (dls_link_header_info(dsp->ds_dlp, mp, &mhi) != 0) 889 goto discard; 890 891 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 892 /* 893 * If LSO is enabled, check the size against lso_max. Otherwise, 894 * compare the packet size with max_sdu. 895 */ 896 max_sdu = dsp->ds_lso ? dsp->ds_lso_max : max_sdu; 897 if (size > max_sdu + mhi.mhi_hdrsize) 898 goto discard; 899 900 if (is_ethernet) { 901 dvid = mac_client_vid(dsp->ds_mch); 902 903 /* 904 * Discard the packet if this is a VLAN stream but the VID in 905 * the packet is not correct. 906 */ 907 vid = VLAN_ID(mhi.mhi_tci); 908 if ((dvid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 909 goto discard; 910 911 /* 912 * Discard the packet if this packet is a tagged packet 913 * but both pri and VID are 0. 914 */ 915 pri = VLAN_PRI(mhi.mhi_tci); 916 if (mhi.mhi_istagged && (pri == 0) && (vid == VLAN_ID_NONE)) 917 goto discard; 918 919 /* 920 * Update the priority bits to the per-stream priority if 921 * priority is not set in the packet. Update the VID for 922 * packets on a VLAN stream. 923 */ 924 pri = (pri == 0) ? dsp->ds_pri : 0; 925 if ((pri != 0) || (dvid != VLAN_ID_NONE)) { 926 if ((newmp = i_dld_ether_header_update_tag(mp, pri, 927 dvid, dsp->ds_dlp->dl_tagmode)) == NULL) { 928 goto discard; 929 } 930 mp = newmp; 931 } 932 } 933 934 if (DLD_TX(dsp, mp, 0, 0) != NULL) { 935 /* Turn on flow-control for dld */ 936 DLD_SETQFULL(dsp); 937 } 938 return; 939 940 discard: 941 /* TODO: bump kstat? */ 942 freemsg(mp); 943 } 944 945 /* 946 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 947 */ 948 int 949 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 950 { 951 dev_t dev; 952 int err; 953 const char *drvname; 954 mac_perim_handle_t mph = NULL; 955 boolean_t qassociated = B_FALSE; 956 dls_link_t *dlp = NULL; 957 dls_dl_handle_t ddp = NULL; 958 959 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 960 return (EINVAL); 961 962 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) 963 return (ENOTSUP); 964 965 /* 966 * /dev node access. This will still be supported for backward 967 * compatibility reason. 968 */ 969 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && 970 (strcmp(drvname, "vnic") != 0)) { 971 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 972 return (EINVAL); 973 qassociated = B_TRUE; 974 } 975 976 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); 977 if ((err = dls_devnet_hold_by_dev(dev, &ddp)) != 0) 978 goto failed; 979 980 if ((err = mac_perim_enter_by_macname(dls_devnet_mac(ddp), &mph)) != 0) 981 goto failed; 982 983 /* 984 * Open a channel. 985 */ 986 if ((err = dls_link_hold(dls_devnet_mac(ddp), &dlp)) != 0) 987 goto failed; 988 989 if ((err = dls_open(dlp, ddp, dsp)) != 0) 990 goto failed; 991 992 /* 993 * Set the default packet priority. 994 */ 995 dsp->ds_pri = 0; 996 997 /* 998 * Add a notify function so that the we get updates from the MAC. 999 */ 1000 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, dsp); 1001 dsp->ds_dlstate = DL_UNBOUND; 1002 mac_perim_exit(mph); 1003 return (0); 1004 1005 failed: 1006 if (dlp != NULL) 1007 dls_link_rele(dlp); 1008 if (mph != NULL) 1009 mac_perim_exit(mph); 1010 if (ddp != NULL) 1011 dls_devnet_rele(ddp); 1012 if (qassociated) 1013 (void) qassociate(dsp->ds_wq, -1); 1014 1015 return (err); 1016 } 1017 1018 /* 1019 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1020 * from close(2) for style 2. 1021 */ 1022 void 1023 dld_str_detach(dld_str_t *dsp) 1024 { 1025 mac_perim_handle_t mph; 1026 int err; 1027 1028 ASSERT(dsp->ds_datathr_cnt == 0); 1029 1030 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 1031 /* 1032 * Remove the notify function. 1033 * 1034 * Note that we cannot wait for the notification callback to be removed 1035 * since it could cause the deadlock with str_notify() since they both 1036 * need the mac perimeter. Continue if we cannot remove the 1037 * notification callback right now and wait after we leave the 1038 * perimeter. 1039 */ 1040 err = mac_notify_remove(dsp->ds_mnh, B_FALSE); 1041 dsp->ds_mnh = NULL; 1042 1043 /* 1044 * Disable the capabilities 1045 */ 1046 dld_capabilities_disable(dsp); 1047 1048 /* 1049 * Clear LSO flags. 1050 */ 1051 dsp->ds_lso = B_FALSE; 1052 dsp->ds_lso_max = 0; 1053 1054 dls_close(dsp); 1055 mac_perim_exit(mph); 1056 1057 /* 1058 * Now we leave the mac perimeter. If mac_notify_remove() failed 1059 * because the notification callback was in progress, wait for 1060 * it to finish before we proceed. 1061 */ 1062 if (err != 0) 1063 mac_notify_remove_wait(dsp->ds_mh); 1064 1065 /* 1066 * An unreferenced tagged (non-persistent) vlan gets destroyed 1067 * automatically in the call to dls_devnet_rele. 1068 */ 1069 dls_devnet_rele(dsp->ds_ddh); 1070 1071 dsp->ds_sap = 0; 1072 dsp->ds_mh = NULL; 1073 dsp->ds_mch = NULL; 1074 dsp->ds_mip = NULL; 1075 1076 if (dsp->ds_style == DL_STYLE2) 1077 (void) qassociate(dsp->ds_wq, -1); 1078 1079 /* 1080 * Re-initialize the DLPI state machine. 1081 */ 1082 dsp->ds_dlstate = DL_UNATTACHED; 1083 } 1084 1085 /* 1086 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1087 * tags before sending packets up to the DLS clients, with the exception of 1088 * special priority tagged packets, in that case, we set the VID to 0. 1089 * mp must be a VLAN tagged packet. 1090 */ 1091 static mblk_t * 1092 i_dld_ether_header_strip_tag(mblk_t *mp, boolean_t keep_pri) 1093 { 1094 mblk_t *newmp; 1095 struct ether_vlan_header *evhp; 1096 uint16_t tci, new_tci; 1097 1098 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1099 if (DB_REF(mp) > 1) { 1100 newmp = copymsg(mp); 1101 if (newmp == NULL) 1102 return (NULL); 1103 freemsg(mp); 1104 mp = newmp; 1105 } 1106 evhp = (struct ether_vlan_header *)mp->b_rptr; 1107 1108 tci = ntohs(evhp->ether_tci); 1109 if (VLAN_PRI(tci) == 0 || !keep_pri) { 1110 /* 1111 * Priority is 0, strip the tag. 1112 */ 1113 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1114 mp->b_rptr += VLAN_TAGSZ; 1115 } else { 1116 /* 1117 * Priority is not 0, update the VID to 0. 1118 */ 1119 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1120 evhp->ether_tci = htons(new_tci); 1121 } 1122 return (mp); 1123 } 1124 1125 /* 1126 * Raw mode receive function. 1127 */ 1128 /*ARGSUSED*/ 1129 void 1130 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1131 mac_header_info_t *mhip) 1132 { 1133 dld_str_t *dsp = (dld_str_t *)arg; 1134 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1135 mblk_t *next, *newmp; 1136 1137 ASSERT(mp != NULL); 1138 do { 1139 /* 1140 * Get the pointer to the next packet in the chain and then 1141 * clear b_next before the packet gets passed on. 1142 */ 1143 next = mp->b_next; 1144 mp->b_next = NULL; 1145 1146 /* 1147 * Wind back b_rptr to point at the MAC header. 1148 */ 1149 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1150 mp->b_rptr -= mhip->mhi_hdrsize; 1151 1152 /* 1153 * Certain MAC type plugins provide an illusion for raw 1154 * DLPI consumers. They pretend that the MAC layer is 1155 * something that it's not for the benefit of observability 1156 * tools. For example, mac_wifi pretends that it's Ethernet 1157 * for such consumers. Here, unless native mode is enabled, 1158 * we call into the MAC layer so that this illusion can be 1159 * maintained. The plugin will optionally transform the MAC 1160 * header here into something that can be passed up to raw 1161 * consumers. The header goes from "cooked" mode to raw mode. 1162 */ 1163 if (!dsp->ds_native) { 1164 newmp = mac_header_uncook(dsp->ds_mh, mp); 1165 if (newmp == NULL) { 1166 freemsg(mp); 1167 goto next; 1168 } 1169 mp = newmp; 1170 } 1171 1172 /* 1173 * Strip the VLAN tag for VLAN streams. 1174 */ 1175 if (is_ethernet && 1176 mac_client_vid(dsp->ds_mch) != VLAN_ID_NONE) { 1177 /* 1178 * The priority should be kept only for VLAN 1179 * data-links. 1180 */ 1181 newmp = i_dld_ether_header_strip_tag(mp, 1182 mac_client_is_vlan_vnic(dsp->ds_mch)); 1183 if (newmp == NULL) { 1184 freemsg(mp); 1185 goto next; 1186 } 1187 mp = newmp; 1188 } 1189 1190 /* 1191 * Pass the packet on. 1192 */ 1193 if (canputnext(dsp->ds_rq)) 1194 putnext(dsp->ds_rq, mp); 1195 else 1196 freemsg(mp); 1197 1198 next: 1199 /* 1200 * Move on to the next packet in the chain. 1201 */ 1202 mp = next; 1203 } while (mp != NULL); 1204 } 1205 1206 /* 1207 * Fast-path receive function. 1208 */ 1209 /*ARGSUSED*/ 1210 void 1211 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1212 mac_header_info_t *mhip) 1213 { 1214 dld_str_t *dsp = (dld_str_t *)arg; 1215 mblk_t *next; 1216 size_t offset = 0; 1217 1218 /* 1219 * MAC header stripping rules: 1220 * - Tagged packets: 1221 * a. VLAN streams. Strip the whole VLAN header including the tag. 1222 * b. Physical streams 1223 * - VLAN packets (non-zero VID). The stream must be either a 1224 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1225 * Strip the Ethernet header but keep the VLAN header. 1226 * - Special tagged packets (zero VID) 1227 * * The stream is either a DL_PROMISC_SAP listener or a 1228 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1229 * keep the VLAN header. 1230 * * Otherwise, strip the whole VLAN header. 1231 * - Untagged packets. Strip the whole MAC header. 1232 */ 1233 if (mhip->mhi_istagged && 1234 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1235 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1236 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1237 offset = VLAN_TAGSZ; 1238 } 1239 1240 ASSERT(mp != NULL); 1241 do { 1242 /* 1243 * Get the pointer to the next packet in the chain and then 1244 * clear b_next before the packet gets passed on. 1245 */ 1246 next = mp->b_next; 1247 mp->b_next = NULL; 1248 1249 /* 1250 * Wind back b_rptr to point at the VLAN header. 1251 */ 1252 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1253 mp->b_rptr -= offset; 1254 1255 /* 1256 * Pass the packet on. 1257 */ 1258 if (canputnext(dsp->ds_rq)) 1259 putnext(dsp->ds_rq, mp); 1260 else 1261 freemsg(mp); 1262 /* 1263 * Move on to the next packet in the chain. 1264 */ 1265 mp = next; 1266 } while (mp != NULL); 1267 } 1268 1269 /* 1270 * Default receive function (send DL_UNITDATA_IND messages). 1271 */ 1272 /*ARGSUSED*/ 1273 void 1274 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1275 mac_header_info_t *mhip) 1276 { 1277 dld_str_t *dsp = (dld_str_t *)arg; 1278 mblk_t *ud_mp; 1279 mblk_t *next; 1280 size_t offset = 0; 1281 boolean_t strip_vlan = B_TRUE; 1282 1283 /* 1284 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1285 */ 1286 if (mhip->mhi_istagged && 1287 (mac_client_vid(dsp->ds_mch) == VLAN_ID_NONE) && 1288 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1289 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1290 offset = VLAN_TAGSZ; 1291 strip_vlan = B_FALSE; 1292 } 1293 1294 ASSERT(mp != NULL); 1295 do { 1296 /* 1297 * Get the pointer to the next packet in the chain and then 1298 * clear b_next before the packet gets passed on. 1299 */ 1300 next = mp->b_next; 1301 mp->b_next = NULL; 1302 1303 /* 1304 * Wind back b_rptr to point at the MAC header. 1305 */ 1306 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1307 mp->b_rptr -= mhip->mhi_hdrsize; 1308 1309 /* 1310 * Create the DL_UNITDATA_IND M_PROTO. 1311 */ 1312 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1313 freemsgchain(mp); 1314 return; 1315 } 1316 1317 /* 1318 * Advance b_rptr to point at the payload (or the VLAN header). 1319 */ 1320 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1321 1322 /* 1323 * Prepend the DL_UNITDATA_IND. 1324 */ 1325 ud_mp->b_cont = mp; 1326 1327 /* 1328 * Send the message. 1329 */ 1330 if (canputnext(dsp->ds_rq)) 1331 putnext(dsp->ds_rq, ud_mp); 1332 else 1333 freemsg(ud_mp); 1334 1335 /* 1336 * Move on to the next packet in the chain. 1337 */ 1338 mp = next; 1339 } while (mp != NULL); 1340 } 1341 1342 /* 1343 * DL_NOTIFY_IND: DL_NOTE_SDU_SIZE 1344 */ 1345 static void 1346 str_notify_sdu_size(dld_str_t *dsp, uint_t max_sdu) 1347 { 1348 mblk_t *mp; 1349 dl_notify_ind_t *dlip; 1350 1351 if (!(dsp->ds_notifications & DL_NOTE_SDU_SIZE)) 1352 return; 1353 1354 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1355 M_PROTO, 0)) == NULL) 1356 return; 1357 1358 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1359 dlip = (dl_notify_ind_t *)mp->b_rptr; 1360 dlip->dl_primitive = DL_NOTIFY_IND; 1361 dlip->dl_notification = DL_NOTE_SDU_SIZE; 1362 dlip->dl_data = max_sdu; 1363 1364 qreply(dsp->ds_wq, mp); 1365 } 1366 1367 /* 1368 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1369 * current state of the interface. 1370 */ 1371 void 1372 dld_str_notify_ind(dld_str_t *dsp) 1373 { 1374 mac_notify_type_t type; 1375 1376 for (type = 0; type < MAC_NNOTE; type++) 1377 str_notify(dsp, type); 1378 } 1379 1380 typedef struct dl_unitdata_ind_wrapper { 1381 dl_unitdata_ind_t dl_unitdata; 1382 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1383 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1384 } dl_unitdata_ind_wrapper_t; 1385 1386 /* 1387 * Create a DL_UNITDATA_IND M_PROTO message. 1388 */ 1389 static mblk_t * 1390 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1391 { 1392 mblk_t *nmp; 1393 dl_unitdata_ind_wrapper_t *dlwp; 1394 dl_unitdata_ind_t *dlp; 1395 mac_header_info_t mhi; 1396 uint_t addr_length; 1397 uint8_t *daddr; 1398 uint8_t *saddr; 1399 1400 /* 1401 * Get the packet header information. 1402 */ 1403 if (dls_link_header_info(dsp->ds_dlp, mp, &mhi) != 0) 1404 return (NULL); 1405 1406 /* 1407 * Allocate a message large enough to contain the wrapper structure 1408 * defined above. 1409 */ 1410 if ((nmp = mexchange(dsp->ds_wq, NULL, 1411 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1412 DL_UNITDATA_IND)) == NULL) 1413 return (NULL); 1414 1415 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1416 1417 dlp = &(dlwp->dl_unitdata); 1418 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1419 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1420 1421 /* 1422 * Copy in the destination address. 1423 */ 1424 addr_length = dsp->ds_mip->mi_addr_length; 1425 daddr = dlwp->dl_dest_addr; 1426 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1427 bcopy(mhi.mhi_daddr, daddr, addr_length); 1428 1429 /* 1430 * Set the destination DLSAP to the SAP value encoded in the packet. 1431 */ 1432 if (mhi.mhi_istagged && !strip_vlan) 1433 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1434 else 1435 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1436 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1437 1438 /* 1439 * If the destination address was multicast or broadcast then the 1440 * dl_group_address field should be non-zero. 1441 */ 1442 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1443 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1444 1445 /* 1446 * Copy in the source address if one exists. Some MAC types (DL_IB 1447 * for example) may not have access to source information. 1448 */ 1449 if (mhi.mhi_saddr == NULL) { 1450 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1451 } else { 1452 saddr = dlwp->dl_src_addr; 1453 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1454 bcopy(mhi.mhi_saddr, saddr, addr_length); 1455 1456 /* 1457 * Set the source DLSAP to the packet ethertype. 1458 */ 1459 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1460 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1461 } 1462 1463 return (nmp); 1464 } 1465 1466 /* 1467 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1468 */ 1469 static void 1470 str_notify_promisc_on_phys(dld_str_t *dsp) 1471 { 1472 mblk_t *mp; 1473 dl_notify_ind_t *dlip; 1474 1475 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1476 return; 1477 1478 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1479 M_PROTO, 0)) == NULL) 1480 return; 1481 1482 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1483 dlip = (dl_notify_ind_t *)mp->b_rptr; 1484 dlip->dl_primitive = DL_NOTIFY_IND; 1485 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1486 1487 qreply(dsp->ds_wq, mp); 1488 } 1489 1490 /* 1491 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1492 */ 1493 static void 1494 str_notify_promisc_off_phys(dld_str_t *dsp) 1495 { 1496 mblk_t *mp; 1497 dl_notify_ind_t *dlip; 1498 1499 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1500 return; 1501 1502 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1503 M_PROTO, 0)) == NULL) 1504 return; 1505 1506 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1507 dlip = (dl_notify_ind_t *)mp->b_rptr; 1508 dlip->dl_primitive = DL_NOTIFY_IND; 1509 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1510 1511 qreply(dsp->ds_wq, mp); 1512 } 1513 1514 /* 1515 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1516 */ 1517 static void 1518 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1519 { 1520 mblk_t *mp; 1521 dl_notify_ind_t *dlip; 1522 uint_t addr_length; 1523 uint16_t ethertype; 1524 1525 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1526 return; 1527 1528 addr_length = dsp->ds_mip->mi_addr_length; 1529 if ((mp = mexchange(dsp->ds_wq, NULL, 1530 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1531 M_PROTO, 0)) == NULL) 1532 return; 1533 1534 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1535 dlip = (dl_notify_ind_t *)mp->b_rptr; 1536 dlip->dl_primitive = DL_NOTIFY_IND; 1537 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1538 dlip->dl_data = DL_CURR_PHYS_ADDR; 1539 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1540 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1541 1542 bcopy(addr, &dlip[1], addr_length); 1543 1544 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1545 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1546 1547 qreply(dsp->ds_wq, mp); 1548 } 1549 1550 /* 1551 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1552 */ 1553 static void 1554 str_notify_link_up(dld_str_t *dsp) 1555 { 1556 mblk_t *mp; 1557 dl_notify_ind_t *dlip; 1558 1559 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1560 return; 1561 1562 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1563 M_PROTO, 0)) == NULL) 1564 return; 1565 1566 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1567 dlip = (dl_notify_ind_t *)mp->b_rptr; 1568 dlip->dl_primitive = DL_NOTIFY_IND; 1569 dlip->dl_notification = DL_NOTE_LINK_UP; 1570 1571 qreply(dsp->ds_wq, mp); 1572 } 1573 1574 /* 1575 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1576 */ 1577 static void 1578 str_notify_link_down(dld_str_t *dsp) 1579 { 1580 mblk_t *mp; 1581 dl_notify_ind_t *dlip; 1582 1583 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1584 return; 1585 1586 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1587 M_PROTO, 0)) == NULL) 1588 return; 1589 1590 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1591 dlip = (dl_notify_ind_t *)mp->b_rptr; 1592 dlip->dl_primitive = DL_NOTIFY_IND; 1593 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1594 1595 qreply(dsp->ds_wq, mp); 1596 } 1597 1598 /* 1599 * DL_NOTIFY_IND: DL_NOTE_SPEED 1600 */ 1601 static void 1602 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1603 { 1604 mblk_t *mp; 1605 dl_notify_ind_t *dlip; 1606 1607 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1608 return; 1609 1610 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1611 M_PROTO, 0)) == NULL) 1612 return; 1613 1614 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1615 dlip = (dl_notify_ind_t *)mp->b_rptr; 1616 dlip->dl_primitive = DL_NOTIFY_IND; 1617 dlip->dl_notification = DL_NOTE_SPEED; 1618 dlip->dl_data = speed; 1619 1620 qreply(dsp->ds_wq, mp); 1621 } 1622 1623 /* 1624 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1625 */ 1626 static void 1627 str_notify_capab_reneg(dld_str_t *dsp) 1628 { 1629 mblk_t *mp; 1630 dl_notify_ind_t *dlip; 1631 1632 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1633 return; 1634 1635 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1636 M_PROTO, 0)) == NULL) 1637 return; 1638 1639 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1640 dlip = (dl_notify_ind_t *)mp->b_rptr; 1641 dlip->dl_primitive = DL_NOTIFY_IND; 1642 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1643 1644 qreply(dsp->ds_wq, mp); 1645 } 1646 1647 /* 1648 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1649 */ 1650 static void 1651 str_notify_fastpath_flush(dld_str_t *dsp) 1652 { 1653 mblk_t *mp; 1654 dl_notify_ind_t *dlip; 1655 1656 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1657 return; 1658 1659 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1660 M_PROTO, 0)) == NULL) 1661 return; 1662 1663 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1664 dlip = (dl_notify_ind_t *)mp->b_rptr; 1665 dlip->dl_primitive = DL_NOTIFY_IND; 1666 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1667 1668 qreply(dsp->ds_wq, mp); 1669 } 1670 1671 /* 1672 * MAC notification callback. 1673 */ 1674 void 1675 str_notify(void *arg, mac_notify_type_t type) 1676 { 1677 dld_str_t *dsp = (dld_str_t *)arg; 1678 queue_t *q = dsp->ds_wq; 1679 mac_handle_t mh = dsp->ds_mh; 1680 mac_client_handle_t mch = dsp->ds_mch; 1681 uint8_t addr[MAXMACADDRLEN]; 1682 1683 switch (type) { 1684 case MAC_NOTE_TX: 1685 qenable(q); 1686 break; 1687 1688 case MAC_NOTE_DEVPROMISC: 1689 /* 1690 * Send the appropriate DL_NOTIFY_IND. 1691 */ 1692 if (mac_promisc_get(mh)) 1693 str_notify_promisc_on_phys(dsp); 1694 else 1695 str_notify_promisc_off_phys(dsp); 1696 break; 1697 1698 case MAC_NOTE_UNICST: 1699 /* 1700 * This notification is sent whenever the MAC unicast 1701 * address changes. 1702 */ 1703 mac_unicast_primary_get(mh, addr); 1704 1705 /* 1706 * Send the appropriate DL_NOTIFY_IND. 1707 */ 1708 str_notify_phys_addr(dsp, addr); 1709 break; 1710 1711 case MAC_NOTE_LINK: 1712 /* 1713 * This notification is sent every time the MAC driver 1714 * updates the link state. 1715 */ 1716 switch (mac_client_stat_get(mch, MAC_STAT_LINK_STATE)) { 1717 case LINK_STATE_UP: { 1718 uint64_t speed; 1719 /* 1720 * The link is up so send the appropriate 1721 * DL_NOTIFY_IND. 1722 */ 1723 str_notify_link_up(dsp); 1724 1725 speed = mac_stat_get(mh, MAC_STAT_IFSPEED); 1726 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1727 break; 1728 } 1729 case LINK_STATE_DOWN: 1730 /* 1731 * The link is down so send the appropriate 1732 * DL_NOTIFY_IND. 1733 */ 1734 str_notify_link_down(dsp); 1735 break; 1736 1737 default: 1738 break; 1739 } 1740 break; 1741 1742 case MAC_NOTE_CAPAB_CHG: 1743 /* 1744 * This notification is sent whenever the MAC resources 1745 * change or capabilities change. We need to renegotiate 1746 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1747 */ 1748 str_notify_capab_reneg(dsp); 1749 break; 1750 1751 case MAC_NOTE_SDU_SIZE: { 1752 uint_t max_sdu; 1753 mac_sdu_get(dsp->ds_mh, NULL, &max_sdu); 1754 str_notify_sdu_size(dsp, max_sdu); 1755 break; 1756 } 1757 1758 case MAC_NOTE_FASTPATH_FLUSH: 1759 str_notify_fastpath_flush(dsp); 1760 break; 1761 1762 case MAC_NOTE_MARGIN: 1763 break; 1764 1765 default: 1766 ASSERT(B_FALSE); 1767 break; 1768 } 1769 } 1770 1771 /* 1772 * This function is called via a taskq mechansim to process all control 1773 * messages on a per 'dsp' end point. 1774 */ 1775 static void 1776 dld_wput_nondata_task(void *arg) 1777 { 1778 dld_str_t *dsp = arg; 1779 mblk_t *mp; 1780 1781 mutex_enter(&dsp->ds_lock); 1782 while (dsp->ds_pending_head != NULL) { 1783 mp = dsp->ds_pending_head; 1784 dsp->ds_pending_head = mp->b_next; 1785 mp->b_next = NULL; 1786 if (dsp->ds_pending_head == NULL) 1787 dsp->ds_pending_tail = NULL; 1788 mutex_exit(&dsp->ds_lock); 1789 1790 switch (DB_TYPE(mp)) { 1791 case M_PROTO: 1792 case M_PCPROTO: 1793 dld_proto(dsp, mp); 1794 break; 1795 case M_IOCTL: 1796 dld_ioc(dsp, mp); 1797 break; 1798 default: 1799 ASSERT(0); 1800 } 1801 1802 mutex_enter(&dsp->ds_lock); 1803 } 1804 ASSERT(dsp->ds_pending_tail == NULL); 1805 dsp->ds_dlpi_pending = 0; 1806 cv_broadcast(&dsp->ds_dlpi_pending_cv); 1807 mutex_exit(&dsp->ds_lock); 1808 } 1809 1810 /* 1811 * Kernel thread to handle taskq dispatch failures in dld_wput_data. This 1812 * thread is started at boot time. 1813 */ 1814 static void 1815 dld_taskq_dispatch(void) 1816 { 1817 callb_cpr_t cprinfo; 1818 dld_str_t *dsp; 1819 1820 CALLB_CPR_INIT(&cprinfo, &dld_taskq_lock, callb_generic_cpr, 1821 "dld_taskq_dispatch"); 1822 mutex_enter(&dld_taskq_lock); 1823 1824 while (!dld_taskq_quit) { 1825 dsp = list_head(&dld_taskq_list); 1826 while (dsp != NULL) { 1827 list_remove(&dld_taskq_list, dsp); 1828 mutex_exit(&dld_taskq_lock); 1829 VERIFY(taskq_dispatch(dld_taskq, dld_wput_nondata_task, 1830 dsp, TQ_SLEEP) != 0); 1831 mutex_enter(&dld_taskq_lock); 1832 dsp = list_head(&dld_taskq_list); 1833 } 1834 1835 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1836 cv_wait(&dld_taskq_cv, &dld_taskq_lock); 1837 CALLB_CPR_SAFE_END(&cprinfo, &dld_taskq_lock); 1838 } 1839 1840 dld_taskq_done = B_TRUE; 1841 cv_signal(&dld_taskq_cv); 1842 CALLB_CPR_EXIT(&cprinfo); 1843 thread_exit(); 1844 } 1845 1846 /* 1847 * All control operations are serialized on the 'dsp' and are also funneled 1848 * through a taskq mechanism to ensure that subsequent processing has kernel 1849 * context and can safely use cv_wait. 1850 * 1851 * Mechanisms to handle taskq dispatch failures 1852 * 1853 * The only way to be sure that taskq dispatch does not fail is to either 1854 * specify TQ_SLEEP or to use a static taskq and prepopulate it with 1855 * some number of entries and make sure that the number of outstanding requests 1856 * are less than that number. We can't use TQ_SLEEP since we don't know the 1857 * context. Nor can we bound the total number of 'dsp' end points. So we are 1858 * unable to use either of the above schemes, and are forced to deal with 1859 * taskq dispatch failures. Note that even dynamic taskq could fail in 1860 * dispatch if TQ_NOSLEEP is specified, since this flag is translated 1861 * eventually to KM_NOSLEEP and kmem allocations could fail in the taskq 1862 * framework. 1863 * 1864 * We maintain a queue of 'dsp's that encountered taskq dispatch failure. 1865 * We also have a single global thread to retry the taskq dispatch. This 1866 * thread loops in 'dld_taskq_dispatch' and retries the taskq dispatch, but 1867 * uses TQ_SLEEP to ensure eventual success of the dispatch operation. 1868 */ 1869 static void 1870 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) 1871 { 1872 ASSERT(mp->b_next == NULL); 1873 mutex_enter(&dsp->ds_lock); 1874 if (dsp->ds_pending_head != NULL) { 1875 ASSERT(dsp->ds_dlpi_pending); 1876 dsp->ds_pending_tail->b_next = mp; 1877 dsp->ds_pending_tail = mp; 1878 mutex_exit(&dsp->ds_lock); 1879 return; 1880 } 1881 ASSERT(dsp->ds_pending_tail == NULL); 1882 dsp->ds_pending_head = dsp->ds_pending_tail = mp; 1883 /* 1884 * At this point if ds_dlpi_pending is set, it implies that the taskq 1885 * thread is still active and is processing the last message, though 1886 * the pending queue has been emptied. 1887 */ 1888 if (dsp->ds_dlpi_pending) { 1889 mutex_exit(&dsp->ds_lock); 1890 return; 1891 } 1892 1893 dsp->ds_dlpi_pending = 1; 1894 mutex_exit(&dsp->ds_lock); 1895 1896 if (taskq_dispatch(dld_taskq, dld_wput_nondata_task, dsp, 1897 TQ_NOSLEEP) != 0) 1898 return; 1899 1900 mutex_enter(&dld_taskq_lock); 1901 list_insert_tail(&dld_taskq_list, dsp); 1902 cv_signal(&dld_taskq_cv); 1903 mutex_exit(&dld_taskq_lock); 1904 } 1905 1906 /* 1907 * Process an M_IOCTL message. 1908 */ 1909 static void 1910 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1911 { 1912 uint_t cmd; 1913 1914 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1915 ASSERT(dsp->ds_type == DLD_DLPI); 1916 1917 switch (cmd) { 1918 case DLIOCNATIVE: 1919 ioc_native(dsp, mp); 1920 break; 1921 case DLIOCMARGININFO: 1922 ioc_margin(dsp, mp); 1923 break; 1924 case DLIOCRAW: 1925 ioc_raw(dsp, mp); 1926 break; 1927 case DLIOCHDRINFO: 1928 ioc_fast(dsp, mp); 1929 break; 1930 default: 1931 ioc(dsp, mp); 1932 } 1933 } 1934 1935 /* 1936 * DLIOCNATIVE 1937 */ 1938 static void 1939 ioc_native(dld_str_t *dsp, mblk_t *mp) 1940 { 1941 queue_t *q = dsp->ds_wq; 1942 const mac_info_t *mip = dsp->ds_mip; 1943 1944 /* 1945 * Native mode can be enabled if it's disabled and if the 1946 * native media type is different. 1947 */ 1948 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 1949 dsp->ds_native = B_TRUE; 1950 1951 if (dsp->ds_native) 1952 miocack(q, mp, 0, mip->mi_nativemedia); 1953 else 1954 miocnak(q, mp, 0, ENOTSUP); 1955 } 1956 1957 /* 1958 * DLIOCMARGININFO 1959 */ 1960 static void 1961 ioc_margin(dld_str_t *dsp, mblk_t *mp) 1962 { 1963 queue_t *q = dsp->ds_wq; 1964 uint32_t margin; 1965 int err; 1966 1967 if (dsp->ds_dlstate == DL_UNATTACHED) { 1968 err = EINVAL; 1969 goto failed; 1970 } 1971 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) 1972 goto failed; 1973 1974 mac_margin_get(dsp->ds_mh, &margin); 1975 *((uint32_t *)mp->b_cont->b_rptr) = margin; 1976 miocack(q, mp, sizeof (uint32_t), 0); 1977 return; 1978 1979 failed: 1980 miocnak(q, mp, 0, err); 1981 } 1982 1983 /* 1984 * DLIOCRAW 1985 */ 1986 static void 1987 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1988 { 1989 queue_t *q = dsp->ds_wq; 1990 mac_perim_handle_t mph; 1991 1992 if (dsp->ds_mh == NULL) { 1993 dsp->ds_mode = DLD_RAW; 1994 miocack(q, mp, 0, 0); 1995 return; 1996 } 1997 1998 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 1999 if (dsp->ds_polling || dsp->ds_direct) { 2000 mac_perim_exit(mph); 2001 miocnak(q, mp, 0, EPROTO); 2002 return; 2003 } 2004 2005 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 2006 /* 2007 * Set the receive callback. 2008 */ 2009 dls_rx_set(dsp, dld_str_rx_raw, dsp); 2010 } 2011 2012 /* 2013 * Note that raw mode is enabled. 2014 */ 2015 dsp->ds_mode = DLD_RAW; 2016 mac_perim_exit(mph); 2017 2018 miocack(q, mp, 0, 0); 2019 } 2020 2021 /* 2022 * DLIOCHDRINFO 2023 */ 2024 static void 2025 ioc_fast(dld_str_t *dsp, mblk_t *mp) 2026 { 2027 dl_unitdata_req_t *dlp; 2028 off_t off; 2029 size_t len; 2030 const uint8_t *addr; 2031 uint16_t sap; 2032 mblk_t *nmp; 2033 mblk_t *hmp; 2034 uint_t addr_length; 2035 queue_t *q = dsp->ds_wq; 2036 int err; 2037 mac_perim_handle_t mph; 2038 2039 if (dld_opt & DLD_OPT_NO_FASTPATH) { 2040 err = ENOTSUP; 2041 goto failed; 2042 } 2043 2044 /* 2045 * DLIOCHDRINFO should only come from IP. The one initiated from 2046 * user-land should not be allowed. 2047 */ 2048 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 2049 err = EINVAL; 2050 goto failed; 2051 } 2052 2053 nmp = mp->b_cont; 2054 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 2055 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 2056 dlp->dl_primitive != DL_UNITDATA_REQ)) { 2057 err = EINVAL; 2058 goto failed; 2059 } 2060 2061 off = dlp->dl_dest_addr_offset; 2062 len = dlp->dl_dest_addr_length; 2063 2064 if (!MBLKIN(nmp, off, len)) { 2065 err = EINVAL; 2066 goto failed; 2067 } 2068 2069 if (dsp->ds_dlstate != DL_IDLE) { 2070 err = ENOTSUP; 2071 goto failed; 2072 } 2073 2074 addr_length = dsp->ds_mip->mi_addr_length; 2075 if (len != addr_length + sizeof (uint16_t)) { 2076 err = EINVAL; 2077 goto failed; 2078 } 2079 2080 addr = nmp->b_rptr + off; 2081 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2082 2083 if ((hmp = dls_header(dsp, addr, sap, 0, NULL)) == NULL) { 2084 err = ENOMEM; 2085 goto failed; 2086 } 2087 2088 /* 2089 * This ioctl might happen concurrently with a direct call to dld_capab 2090 * that tries to enable direct and/or poll capabilities. Since the 2091 * stack does not serialize them, we do so here to avoid mixing 2092 * the callbacks. 2093 */ 2094 mac_perim_enter_by_mh(dsp->ds_mh, &mph); 2095 if (dsp->ds_mode != DLD_FASTPATH) { 2096 /* 2097 * Set the receive callback (unless polling is enabled). 2098 */ 2099 if (!dsp->ds_polling && !dsp->ds_direct) 2100 dls_rx_set(dsp, dld_str_rx_fastpath, dsp); 2101 2102 /* 2103 * Note that fast-path mode is enabled. 2104 */ 2105 dsp->ds_mode = DLD_FASTPATH; 2106 } 2107 mac_perim_exit(mph); 2108 2109 freemsg(nmp->b_cont); 2110 nmp->b_cont = hmp; 2111 2112 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2113 return; 2114 failed: 2115 miocnak(q, mp, 0, err); 2116 } 2117 2118 /* 2119 * Catch-all handler. 2120 */ 2121 static void 2122 ioc(dld_str_t *dsp, mblk_t *mp) 2123 { 2124 queue_t *q = dsp->ds_wq; 2125 2126 if (dsp->ds_dlstate == DL_UNATTACHED) { 2127 miocnak(q, mp, 0, EINVAL); 2128 return; 2129 } 2130 mac_ioctl(dsp->ds_mh, q, mp); 2131 } 2132