1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Data-Link Driver 30 */ 31 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/atomic.h> 36 #include <sys/disp.h> 37 #include <sys/callb.h> 38 #include <sys/vlan.h> 39 #include <sys/dld.h> 40 #include <sys/dld_impl.h> 41 #include <sys/dls_impl.h> 42 #include <inet/common.h> 43 44 static int str_constructor(void *, void *, int); 45 static void str_destructor(void *, void *); 46 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *, boolean_t); 47 static void str_notify_promisc_on_phys(dld_str_t *); 48 static void str_notify_promisc_off_phys(dld_str_t *); 49 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 50 static void str_notify_link_up(dld_str_t *); 51 static void str_notify_link_down(dld_str_t *); 52 static void str_notify_capab_reneg(dld_str_t *); 53 static void str_notify_speed(dld_str_t *, uint32_t); 54 static void str_notify(void *, mac_notify_type_t); 55 56 static void ioc_native(dld_str_t *, mblk_t *); 57 static void ioc_margin(dld_str_t *, mblk_t *); 58 static void ioc_raw(dld_str_t *, mblk_t *); 59 static void ioc_fast(dld_str_t *, mblk_t *); 60 static void ioc(dld_str_t *, mblk_t *); 61 static void dld_tx_enqueue(dld_str_t *, mblk_t *, mblk_t *, boolean_t, 62 uint_t, uint_t); 63 static void dld_wput_nondata(dld_str_t *, mblk_t *); 64 static void dld_wput_nondata_task(void *); 65 static void dld_flush_nondata(dld_str_t *); 66 static mblk_t *i_dld_ether_header_update_tag(mblk_t *, uint_t, uint16_t); 67 static mblk_t *i_dld_ether_header_strip_tag(mblk_t *); 68 69 static uint32_t str_count; 70 static kmem_cache_t *str_cachep; 71 static taskq_t *dld_disp_taskq = NULL; 72 static mod_hash_t *str_hashp; 73 74 #define STR_HASHSZ 64 75 #define STR_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 76 77 static inline uint_t mp_getsize(mblk_t *); 78 79 /* 80 * Interval to count the TX queued depth. Default is 1s (1000000us). 81 * Count the queue depth immediately (not by timeout) if this is set to 0. 82 * See more details above dld_tx_enqueue(). 83 */ 84 uint_t tx_qdepth_interval = 1000000; 85 86 /* 87 * Some notes on entry points, flow-control, queueing and locking: 88 * 89 * This driver exports the traditional STREAMS put entry point as well as 90 * the non-STREAMS fast-path transmit routine which is provided to IP via 91 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 92 * and data operations, while the fast-path routine deals only with M_DATA 93 * fast-path packets. Regardless of the entry point, all outbound packets 94 * will end up in dld_tx_single(), where they will be delivered to the MAC 95 * driver. 96 * 97 * The transmit logic operates in two modes: a "not busy" mode where the 98 * packets will be delivered to the MAC for a send attempt, or "busy" mode 99 * where they will be enqueued in the internal queue because of flow-control. 100 * Flow-control happens when the MAC driver indicates the packets couldn't 101 * be transmitted due to lack of resources (e.g. running out of descriptors). 102 * In such case, the driver will place a dummy message on its write-side 103 * STREAMS queue so that the queue is marked as "full". Any subsequent 104 * packets arriving at the driver will be enqueued in the internal queue, 105 * which is drained in the context of the service thread that gets scheduled 106 * whenever the driver is in the "busy" mode. When all packets have been 107 * successfully delivered by MAC and the internal queue is empty, it will 108 * transition to the "not busy" mode by removing the dummy message from the 109 * write-side STREAMS queue; in effect this will trigger backenabling. 110 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 111 * to the above reasons. 112 * 113 * The driver implements an internal transmit queue independent of STREAMS. 114 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 115 * compared to the putq() and get() STREAMS interfaces. The only putq() and 116 * getq() operations done by the driver are those related to placing and 117 * removing the dummy message to/from the write-side STREAMS queue for flow- 118 * control purposes. 119 * 120 * Locking is done independent of STREAMS due to the driver being fully MT. 121 * Threads entering the driver (either from put or service entry points) 122 * will most likely be readers, with the exception of a few writer cases 123 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 124 * DLD-related ioctl requests. The DLPI detach case is special, because 125 * it involves freeing resources and therefore must be single-threaded. 126 * Unfortunately the readers/writers lock can't be used to protect against 127 * it, because the lock is dropped prior to the driver calling places where 128 * putnext() may be invoked, and such places may depend on those resources 129 * to exist. Because of this, the driver always completes the DLPI detach 130 * process when there are no other threads running in the driver. This is 131 * done by keeping track of the number of threads, such that the the last 132 * thread leaving the driver will finish the pending DLPI detach operation. 133 */ 134 135 /* 136 * dld_max_q_count is the queue depth threshold used to limit the number of 137 * outstanding packets or bytes allowed in the queue; once this limit is 138 * reached the driver will free any incoming ones until the queue depth 139 * drops below the threshold. 140 * 141 * This buffering is provided to accomodate clients which do not employ 142 * their own buffering scheme, and to handle occasional packet bursts. 143 * Clients which handle their own buffering will receive positive feedback 144 * from this driver as soon as it transitions into the "busy" state, i.e. 145 * when the queue is initially filled up; they will get backenabled once 146 * the queue is empty. 147 * 148 * The value chosen here is rather arbitrary; in future some intelligent 149 * heuristics may be involved which could take into account the hardware's 150 * transmit ring size, etc. 151 */ 152 uint_t dld_max_q_count = (16 * 1024 *1024); 153 154 /* 155 * dld_finddevinfo() returns the dev_info_t * corresponding to a particular 156 * dev_t. It searches str_hashp (a table of dld_str_t's) for streams that 157 * match dev_t. If a stream is found and it is attached, its dev_info_t * 158 * is returned. 159 */ 160 typedef struct i_dld_str_state_s { 161 major_t ds_major; 162 minor_t ds_minor; 163 dev_info_t *ds_dip; 164 } i_dld_str_state_t; 165 166 /* ARGSUSED */ 167 static uint_t 168 i_dld_str_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 169 { 170 i_dld_str_state_t *statep = arg; 171 dld_str_t *dsp = (dld_str_t *)val; 172 173 if (statep->ds_major != dsp->ds_major) 174 return (MH_WALK_CONTINUE); 175 176 ASSERT(statep->ds_minor != 0); 177 178 /* 179 * Access to ds_mh needs to be protected by ds_lock. 180 */ 181 rw_enter(&dsp->ds_lock, RW_READER); 182 if (statep->ds_minor == dsp->ds_minor) { 183 /* 184 * Clone: a clone minor is unique. we can terminate the 185 * walk if we find a matching stream -- even if we fail 186 * to obtain the devinfo. 187 */ 188 if (dsp->ds_mh != NULL) 189 statep->ds_dip = mac_devinfo_get(dsp->ds_mh); 190 rw_exit(&dsp->ds_lock); 191 return (MH_WALK_TERMINATE); 192 } 193 rw_exit(&dsp->ds_lock); 194 return (MH_WALK_CONTINUE); 195 } 196 197 static dev_info_t * 198 dld_finddevinfo(dev_t dev) 199 { 200 dev_info_t *dip; 201 i_dld_str_state_t state; 202 203 if (getminor(dev) == 0) 204 return (NULL); 205 206 /* 207 * See if it's a minor node of a link 208 */ 209 if ((dip = dls_finddevinfo(dev)) != NULL) 210 return (dip); 211 212 state.ds_minor = getminor(dev); 213 state.ds_major = getmajor(dev); 214 state.ds_dip = NULL; 215 216 mod_hash_walk(str_hashp, i_dld_str_walker, &state); 217 return (state.ds_dip); 218 } 219 220 /* 221 * devo_getinfo: getinfo(9e) 222 */ 223 /*ARGSUSED*/ 224 int 225 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 226 { 227 dev_info_t *devinfo; 228 minor_t minor = getminor((dev_t)arg); 229 int rc = DDI_FAILURE; 230 231 switch (cmd) { 232 case DDI_INFO_DEVT2DEVINFO: 233 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 234 *(dev_info_t **)resp = devinfo; 235 rc = DDI_SUCCESS; 236 } 237 break; 238 case DDI_INFO_DEVT2INSTANCE: 239 if (minor > 0 && minor <= DLS_MAX_MINOR) { 240 *resp = (void *)(uintptr_t)DLS_MINOR2INST(minor); 241 rc = DDI_SUCCESS; 242 } else if (minor > DLS_MAX_MINOR && 243 (devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 244 *resp = (void *)(uintptr_t)ddi_get_instance(devinfo); 245 rc = DDI_SUCCESS; 246 } 247 break; 248 } 249 return (rc); 250 } 251 252 /* 253 * qi_qopen: open(9e) 254 */ 255 /*ARGSUSED*/ 256 int 257 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 258 { 259 dld_str_t *dsp; 260 major_t major; 261 minor_t minor; 262 int err; 263 264 if (sflag == MODOPEN) 265 return (ENOTSUP); 266 267 /* 268 * This is a cloning driver and therefore each queue should only 269 * ever get opened once. 270 */ 271 if (rq->q_ptr != NULL) 272 return (EBUSY); 273 274 major = getmajor(*devp); 275 minor = getminor(*devp); 276 277 /* 278 * Create a new dld_str_t for the stream. This will grab a new minor 279 * number that will be handed back in the cloned dev_t. Creation may 280 * fail if we can't allocate the dummy mblk used for flow-control. 281 */ 282 dsp = dld_str_create(rq, DLD_DLPI, major, 283 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 284 if (dsp == NULL) 285 return (ENOSR); 286 287 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 288 if (minor != 0) { 289 /* 290 * Style 1 open 291 */ 292 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 293 goto failed; 294 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 295 } else { 296 (void) qassociate(rq, -1); 297 } 298 299 /* 300 * Enable the queue srv(9e) routine. 301 */ 302 qprocson(rq); 303 304 /* 305 * Construct a cloned dev_t to hand back. 306 */ 307 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 308 return (0); 309 310 failed: 311 dld_str_destroy(dsp); 312 return (err); 313 } 314 315 /* 316 * qi_qclose: close(9e) 317 */ 318 int 319 dld_close(queue_t *rq) 320 { 321 dld_str_t *dsp = rq->q_ptr; 322 323 /* 324 * Disable the queue srv(9e) routine. 325 */ 326 qprocsoff(rq); 327 328 dld_finish_pending_task(dsp); 329 330 /* 331 * This stream was open to a provider node. Check to see 332 * if it has been cleanly shut down. 333 */ 334 if (dsp->ds_dlstate != DL_UNATTACHED) { 335 /* 336 * The stream is either open to a style 1 provider or 337 * this is not clean shutdown. Detach from the PPA. 338 * (This is still ok even in the style 1 case). 339 */ 340 dld_str_detach(dsp); 341 } 342 343 dld_str_destroy(dsp); 344 return (0); 345 } 346 347 /* 348 * qi_qputp: put(9e) 349 */ 350 void 351 dld_wput(queue_t *wq, mblk_t *mp) 352 { 353 dld_str_t *dsp = wq->q_ptr; 354 355 switch (DB_TYPE(mp)) { 356 case M_DATA: { 357 dld_tx_t tx; 358 359 DLD_TX_ENTER(dsp); 360 if ((tx = dsp->ds_tx) != NULL) 361 tx(dsp, mp); 362 else 363 freemsg(mp); 364 DLD_TX_EXIT(dsp); 365 break; 366 } 367 case M_PROTO: 368 case M_PCPROTO: { 369 t_uscalar_t prim; 370 dld_tx_t tx; 371 372 if (MBLKL(mp) < sizeof (t_uscalar_t)) { 373 freemsg(mp); 374 return; 375 } 376 377 prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 378 if (prim != DL_UNITDATA_REQ) { 379 /* Control path */ 380 dld_wput_nondata(dsp, mp); 381 break; 382 } 383 384 /* Data path */ 385 DLD_TX_ENTER(dsp); 386 if ((tx = dsp->ds_unitdata_tx) != NULL) 387 tx(dsp, mp); 388 else 389 dlerrorack(wq, mp, DL_UNITDATA_REQ, DL_OUTSTATE, 0); 390 DLD_TX_EXIT(dsp); 391 break; 392 } 393 case M_IOCTL: 394 case M_IOCDATA: 395 /* Control path */ 396 dld_wput_nondata(dsp, mp); 397 break; 398 case M_FLUSH: 399 /* 400 * Flush both the data messages and the control messages. 401 */ 402 if (*mp->b_rptr & FLUSHW) { 403 dld_flush_nondata(dsp); 404 dld_tx_flush(dsp); 405 *mp->b_rptr &= ~FLUSHW; 406 } 407 408 if (*mp->b_rptr & FLUSHR) { 409 qreply(wq, mp); 410 } else { 411 freemsg(mp); 412 } 413 break; 414 default: 415 freemsg(mp); 416 break; 417 } 418 } 419 420 /* 421 * Called by GLDv3 control node to process the ioctls. It will start 422 * a taskq to allow the ioctl processing to block. This is a temporary 423 * solution, and will be replaced by a more graceful approach afterwards. 424 */ 425 void 426 dld_ioctl(queue_t *wq, mblk_t *mp) 427 { 428 dld_wput_nondata(wq->q_ptr, mp); 429 } 430 431 /* 432 * qi_srvp: srv(9e) 433 */ 434 void 435 dld_wsrv(queue_t *wq) 436 { 437 mblk_t *mp, *head, *tail; 438 dld_str_t *dsp = wq->q_ptr; 439 uint_t cnt, msgcnt; 440 timeout_id_t tid = 0; 441 442 rw_enter(&dsp->ds_lock, RW_READER); 443 /* 444 * Grab all packets (chained via b_next) off our transmit queue 445 * and try to send them all to the MAC layer. Since the queue 446 * is independent of streams, we are able to dequeue all messages 447 * at once without looping through getq() and manually chaining 448 * them. Note that the queue size parameters (byte and message 449 * counts) are cleared as well, but we postpone the backenabling 450 * until after the MAC transmit since some packets may end up 451 * back at our transmit queue. 452 */ 453 mutex_enter(&dsp->ds_tx_list_lock); 454 if ((mp = dsp->ds_tx_list_head) == NULL) { 455 ASSERT(!dsp->ds_tx_qbusy); 456 ASSERT(dsp->ds_tx_flow_mp != NULL); 457 ASSERT(dsp->ds_tx_list_head == NULL); 458 ASSERT(dsp->ds_tx_list_tail == NULL); 459 ASSERT(dsp->ds_tx_cnt == 0); 460 ASSERT(dsp->ds_tx_msgcnt == 0); 461 mutex_exit(&dsp->ds_tx_list_lock); 462 rw_exit(&dsp->ds_lock); 463 return; 464 } 465 head = mp; 466 tail = dsp->ds_tx_list_tail; 467 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 468 cnt = dsp->ds_tx_cnt; 469 msgcnt = dsp->ds_tx_msgcnt; 470 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 471 mutex_exit(&dsp->ds_tx_list_lock); 472 473 /* 474 * Discard packets unless we are attached and bound; note that 475 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 476 * because regardless of the mode all transmit will end up in 477 * dld_tx_single() where the packets may be queued. 478 */ 479 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_MULTIDATA)); 480 if (dsp->ds_dlstate != DL_IDLE) { 481 freemsgchain(mp); 482 goto done; 483 } 484 485 /* 486 * Attempt to transmit one or more packets. If the MAC can't 487 * send them all, re-queue the packet(s) at the beginning of 488 * the transmit queue to avoid any re-ordering. 489 */ 490 mp = dls_tx(dsp->ds_dc, mp); 491 if (mp == head) { 492 /* 493 * No message was sent out. Take the saved the queue depth 494 * as the input, so that dld_tx_enqueue() need not to 495 * calculate it again. 496 */ 497 dld_tx_enqueue(dsp, mp, tail, B_TRUE, msgcnt, cnt); 498 } else if (mp != NULL) { 499 /* 500 * Some but not all messages were sent out. dld_tx_enqueue() 501 * needs to start the timer to calculate the queue depth if 502 * timer has not been started. 503 * 504 * Note that a timer is used to calculate the queue depth 505 * to improve network performance, especially for TCP, in 506 * which case packets are sent without canput() being checked, 507 * and mostly end up in dld_tx_enqueue() under heavy load. 508 */ 509 dld_tx_enqueue(dsp, mp, tail, B_TRUE, 0, 0); 510 } 511 512 done: 513 /* 514 * Grab the list lock again and check if the transmit queue is 515 * really empty; if so, lift up flow-control and backenable any 516 * writer queues. If the queue is not empty, schedule service 517 * thread to drain it. 518 */ 519 mutex_enter(&dsp->ds_tx_list_lock); 520 if (dsp->ds_tx_list_head == NULL) { 521 dsp->ds_tx_flow_mp = getq(wq); 522 ASSERT(dsp->ds_tx_flow_mp != NULL); 523 dsp->ds_tx_qbusy = B_FALSE; 524 if ((tid = dsp->ds_tx_qdepth_tid) != 0) 525 dsp->ds_tx_qdepth_tid = 0; 526 } 527 mutex_exit(&dsp->ds_tx_list_lock); 528 529 /* 530 * Note that ds_tx_list_lock (which is acquired by the timeout 531 * callback routine) cannot be held across the call to untimeout(). 532 */ 533 if (tid != 0) 534 (void) untimeout(tid); 535 536 rw_exit(&dsp->ds_lock); 537 } 538 539 void 540 dld_init_ops(struct dev_ops *ops, const char *name) 541 { 542 struct streamtab *stream; 543 struct qinit *rq, *wq; 544 struct module_info *modinfo; 545 546 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 547 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 548 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 549 modinfo->mi_minpsz = 0; 550 modinfo->mi_maxpsz = 64*1024; 551 modinfo->mi_hiwat = 1; 552 modinfo->mi_lowat = 0; 553 554 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 555 rq->qi_qopen = dld_open; 556 rq->qi_qclose = dld_close; 557 rq->qi_minfo = modinfo; 558 559 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 560 wq->qi_putp = (pfi_t)dld_wput; 561 wq->qi_srvp = (pfi_t)dld_wsrv; 562 wq->qi_minfo = modinfo; 563 564 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 565 stream->st_rdinit = rq; 566 stream->st_wrinit = wq; 567 ops->devo_cb_ops->cb_str = stream; 568 569 ops->devo_getinfo = &dld_getinfo; 570 } 571 572 void 573 dld_fini_ops(struct dev_ops *ops) 574 { 575 struct streamtab *stream; 576 struct qinit *rq, *wq; 577 struct module_info *modinfo; 578 579 stream = ops->devo_cb_ops->cb_str; 580 rq = stream->st_rdinit; 581 wq = stream->st_wrinit; 582 modinfo = rq->qi_minfo; 583 ASSERT(wq->qi_minfo == modinfo); 584 585 kmem_free(stream, sizeof (struct streamtab)); 586 kmem_free(wq, sizeof (struct qinit)); 587 kmem_free(rq, sizeof (struct qinit)); 588 kmem_free(modinfo->mi_idname, FMNAMESZ); 589 kmem_free(modinfo, sizeof (struct module_info)); 590 } 591 592 /* 593 * Initialize this module's data structures. 594 */ 595 void 596 dld_str_init(void) 597 { 598 /* 599 * Create dld_str_t object cache. 600 */ 601 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 602 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 603 ASSERT(str_cachep != NULL); 604 605 /* 606 * Create taskq to process DLPI requests. 607 */ 608 dld_disp_taskq = taskq_create("dld_disp_taskq", 1024, MINCLSYSPRI, 2, 609 INT_MAX, TASKQ_DYNAMIC | TASKQ_PREPOPULATE); 610 611 /* 612 * Create a hash table for maintaining dld_str_t's. 613 * The ds_minor field (the clone minor number) of a dld_str_t 614 * is used as a key for this hash table because this number is 615 * globally unique (allocated from "dls_minor_arena"). 616 */ 617 str_hashp = mod_hash_create_idhash("dld_str_hash", STR_HASHSZ, 618 mod_hash_null_valdtor); 619 } 620 621 /* 622 * Tear down this module's data structures. 623 */ 624 int 625 dld_str_fini(void) 626 { 627 /* 628 * Make sure that there are no objects in use. 629 */ 630 if (str_count != 0) 631 return (EBUSY); 632 633 ASSERT(dld_disp_taskq != NULL); 634 taskq_destroy(dld_disp_taskq); 635 dld_disp_taskq = NULL; 636 637 /* 638 * Destroy object cache. 639 */ 640 kmem_cache_destroy(str_cachep); 641 mod_hash_destroy_idhash(str_hashp); 642 return (0); 643 } 644 645 /* 646 * Create a new dld_str_t object. 647 */ 648 dld_str_t * 649 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 650 { 651 dld_str_t *dsp; 652 int err; 653 654 /* 655 * Allocate an object from the cache. 656 */ 657 atomic_add_32(&str_count, 1); 658 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 659 660 /* 661 * Allocate the dummy mblk for flow-control. 662 */ 663 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 664 if (dsp->ds_tx_flow_mp == NULL) { 665 kmem_cache_free(str_cachep, dsp); 666 atomic_add_32(&str_count, -1); 667 return (NULL); 668 } 669 dsp->ds_type = type; 670 dsp->ds_major = major; 671 dsp->ds_style = style; 672 dsp->ds_tx = dsp->ds_unitdata_tx = NULL; 673 674 /* 675 * Initialize the queue pointers. 676 */ 677 ASSERT(RD(rq) == rq); 678 dsp->ds_rq = rq; 679 dsp->ds_wq = WR(rq); 680 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 681 682 /* 683 * We want explicit control over our write-side STREAMS queue 684 * where the dummy mblk gets added/removed for flow-control. 685 */ 686 noenable(WR(rq)); 687 688 err = mod_hash_insert(str_hashp, STR_HASH_KEY(dsp->ds_minor), 689 (mod_hash_val_t)dsp); 690 ASSERT(err == 0); 691 return (dsp); 692 } 693 694 void 695 dld_finish_pending_task(dld_str_t *dsp) 696 { 697 /* 698 * Wait until the pending requests are processed by the worker thread. 699 */ 700 mutex_enter(&dsp->ds_disp_lock); 701 dsp->ds_closing = B_TRUE; 702 while (dsp->ds_tid != NULL) 703 cv_wait(&dsp->ds_disp_cv, &dsp->ds_disp_lock); 704 dsp->ds_closing = B_FALSE; 705 mutex_exit(&dsp->ds_disp_lock); 706 } 707 708 /* 709 * Destroy a dld_str_t object. 710 */ 711 void 712 dld_str_destroy(dld_str_t *dsp) 713 { 714 queue_t *rq; 715 queue_t *wq; 716 mod_hash_val_t val; 717 /* 718 * Clear the queue pointers. 719 */ 720 rq = dsp->ds_rq; 721 wq = dsp->ds_wq; 722 ASSERT(wq == WR(rq)); 723 724 rq->q_ptr = wq->q_ptr = NULL; 725 dsp->ds_rq = dsp->ds_wq = NULL; 726 727 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 728 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 729 ASSERT(dsp->ds_tx_list_head == NULL); 730 ASSERT(dsp->ds_tx_list_tail == NULL); 731 ASSERT(dsp->ds_tx_cnt == 0); 732 ASSERT(dsp->ds_tx_msgcnt == 0); 733 ASSERT(dsp->ds_tx_qdepth_tid == 0); 734 ASSERT(!dsp->ds_tx_qbusy); 735 736 ASSERT(MUTEX_NOT_HELD(&dsp->ds_disp_lock)); 737 ASSERT(dsp->ds_pending_head == NULL); 738 ASSERT(dsp->ds_pending_tail == NULL); 739 ASSERT(dsp->ds_tx == NULL); 740 ASSERT(dsp->ds_unitdata_tx == NULL); 741 742 /* 743 * Reinitialize all the flags. 744 */ 745 dsp->ds_notifications = 0; 746 dsp->ds_passivestate = DLD_UNINITIALIZED; 747 dsp->ds_mode = DLD_UNITDATA; 748 dsp->ds_native = B_FALSE; 749 750 /* 751 * Free the dummy mblk if exists. 752 */ 753 if (dsp->ds_tx_flow_mp != NULL) { 754 freeb(dsp->ds_tx_flow_mp); 755 dsp->ds_tx_flow_mp = NULL; 756 } 757 758 (void) mod_hash_remove(str_hashp, STR_HASH_KEY(dsp->ds_minor), &val); 759 ASSERT(dsp == (dld_str_t *)val); 760 761 /* 762 * Free the object back to the cache. 763 */ 764 kmem_cache_free(str_cachep, dsp); 765 atomic_add_32(&str_count, -1); 766 } 767 768 /* 769 * kmem_cache contructor function: see kmem_cache_create(9f). 770 */ 771 /*ARGSUSED*/ 772 static int 773 str_constructor(void *buf, void *cdrarg, int kmflags) 774 { 775 dld_str_t *dsp = buf; 776 777 bzero(buf, sizeof (dld_str_t)); 778 779 /* 780 * Allocate a new minor number. 781 */ 782 if ((dsp->ds_minor = mac_minor_hold(kmflags == KM_SLEEP)) == 0) 783 return (-1); 784 785 /* 786 * Initialize the DLPI state machine. 787 */ 788 dsp->ds_dlstate = DL_UNATTACHED; 789 790 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 791 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 792 mutex_init(&dsp->ds_disp_lock, NULL, MUTEX_DRIVER, NULL); 793 cv_init(&dsp->ds_disp_cv, NULL, CV_DRIVER, NULL); 794 mutex_init(&dsp->ds_tx_lock, NULL, MUTEX_DRIVER, NULL); 795 cv_init(&dsp->ds_tx_cv, NULL, CV_DRIVER, NULL); 796 797 return (0); 798 } 799 800 /* 801 * kmem_cache destructor function. 802 */ 803 /*ARGSUSED*/ 804 static void 805 str_destructor(void *buf, void *cdrarg) 806 { 807 dld_str_t *dsp = buf; 808 809 /* 810 * Make sure the DLPI state machine was reset. 811 */ 812 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 813 814 /* 815 * Make sure the data-link interface was closed. 816 */ 817 ASSERT(dsp->ds_mh == NULL); 818 ASSERT(dsp->ds_dc == NULL); 819 ASSERT(dsp->ds_tx == NULL); 820 ASSERT(dsp->ds_unitdata_tx == NULL); 821 ASSERT(dsp->ds_intx_cnt == 0); 822 ASSERT(dsp->ds_detaching == B_FALSE); 823 824 /* 825 * Make sure enabled notifications are cleared. 826 */ 827 ASSERT(dsp->ds_notifications == 0); 828 829 /* 830 * Make sure polling is disabled. 831 */ 832 ASSERT(!dsp->ds_polling); 833 834 /* 835 * Release the minor number. 836 */ 837 mac_minor_rele(dsp->ds_minor); 838 839 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 840 rw_destroy(&dsp->ds_lock); 841 842 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 843 mutex_destroy(&dsp->ds_tx_list_lock); 844 ASSERT(dsp->ds_tx_flow_mp == NULL); 845 ASSERT(dsp->ds_pending_head == NULL); 846 ASSERT(dsp->ds_pending_tail == NULL); 847 ASSERT(!dsp->ds_closing); 848 849 ASSERT(MUTEX_NOT_HELD(&dsp->ds_disp_lock)); 850 mutex_destroy(&dsp->ds_disp_lock); 851 cv_destroy(&dsp->ds_disp_cv); 852 853 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_lock)); 854 mutex_destroy(&dsp->ds_tx_lock); 855 cv_destroy(&dsp->ds_tx_cv); 856 } 857 858 void 859 dld_tx_single(dld_str_t *dsp, mblk_t *mp) 860 { 861 /* 862 * If we are busy enqueue the packet and return. 863 * Otherwise hand them over to the MAC driver for transmission. 864 * If the message didn't get sent it will be queued. 865 * 866 * Note here that we don't grab the list lock prior to checking 867 * the busy flag. This is okay, because a missed transition 868 * will not cause any packet reordering for any particular TCP 869 * connection (which is single-threaded). The enqueue routine 870 * will atomically set the busy flag and schedule the service 871 * thread to run; the flag is only cleared by the service thread 872 * when there is no more packet to be transmitted. 873 */ 874 875 if (dsp->ds_tx_qbusy || ((mp = dls_tx(dsp->ds_dc, mp)) != NULL)) 876 dld_tx_enqueue(dsp, mp, mp, B_FALSE, 1, mp_getsize(mp)); 877 } 878 879 /* 880 * Update the priority bits and VID (may need to insert tag if mp points 881 * to an untagged packet). 882 * If vid is VLAN_ID_NONE, use the VID encoded in the packet. 883 */ 884 static mblk_t * 885 i_dld_ether_header_update_tag(mblk_t *mp, uint_t pri, uint16_t vid) 886 { 887 mblk_t *hmp; 888 struct ether_vlan_header *evhp; 889 struct ether_header *ehp; 890 uint16_t old_tci = 0; 891 size_t len; 892 893 ASSERT(pri != 0 || vid != VLAN_ID_NONE); 894 895 evhp = (struct ether_vlan_header *)mp->b_rptr; 896 if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 897 /* 898 * Tagged packet, update the priority bits. 899 */ 900 old_tci = ntohs(evhp->ether_tci); 901 len = sizeof (struct ether_vlan_header); 902 903 if ((DB_REF(mp) > 1) || (MBLKL(mp) < len)) { 904 /* 905 * In case some drivers only check the db_ref 906 * count of the first mblk, we pullup the 907 * message into a single mblk. 908 */ 909 hmp = msgpullup(mp, -1); 910 if ((hmp == NULL) || (MBLKL(hmp) < len)) { 911 freemsg(hmp); 912 return (NULL); 913 } else { 914 freemsg(mp); 915 mp = hmp; 916 } 917 } 918 919 evhp = (struct ether_vlan_header *)mp->b_rptr; 920 } else { 921 /* 922 * Untagged packet. Insert the special priority tag. 923 * First allocate a header mblk. 924 */ 925 hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 926 if (hmp == NULL) 927 return (NULL); 928 929 evhp = (struct ether_vlan_header *)hmp->b_rptr; 930 ehp = (struct ether_header *)mp->b_rptr; 931 932 /* 933 * Copy the MAC addresses and typelen 934 */ 935 bcopy(ehp, evhp, (ETHERADDRL * 2)); 936 evhp->ether_type = ehp->ether_type; 937 evhp->ether_tpid = htons(ETHERTYPE_VLAN); 938 939 hmp->b_wptr += sizeof (struct ether_vlan_header); 940 mp->b_rptr += sizeof (struct ether_header); 941 942 /* 943 * Free the original message if it's now empty. Link the 944 * rest of the messages to the header message. 945 */ 946 if (MBLKL(mp) == 0) { 947 hmp->b_cont = mp->b_cont; 948 freeb(mp); 949 } else { 950 hmp->b_cont = mp; 951 } 952 mp = hmp; 953 } 954 955 if (pri == 0) 956 pri = VLAN_PRI(old_tci); 957 if (vid == VLAN_ID_NONE) 958 vid = VLAN_ID(old_tci); 959 evhp->ether_tci = htons(VLAN_TCI(pri, VLAN_CFI(old_tci), vid)); 960 return (mp); 961 } 962 963 /* 964 * M_DATA put 965 * 966 * The poll callback function for DLS clients which are not in the per-stream 967 * mode. This function is called from an upper layer protocol (currently only 968 * tcp and udp). 969 */ 970 void 971 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 972 { 973 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 974 mblk_t *newmp; 975 uint_t pri; 976 977 if (is_ethernet) { 978 /* 979 * Update the priority bits to the assigned priority. 980 */ 981 pri = (VLAN_MBLKPRI(mp) == 0) ? dsp->ds_pri : VLAN_MBLKPRI(mp); 982 983 if (pri != 0) { 984 newmp = i_dld_ether_header_update_tag(mp, pri, 985 VLAN_ID_NONE); 986 if (newmp == NULL) 987 goto discard; 988 mp = newmp; 989 } 990 } 991 992 dld_tx_single(dsp, mp); 993 return; 994 995 discard: 996 /* TODO: bump kstat? */ 997 freemsg(mp); 998 } 999 1000 /* 1001 * M_DATA put (DLIOCRAW mode). 1002 */ 1003 void 1004 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 1005 { 1006 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1007 mblk_t *bp, *newmp; 1008 size_t size; 1009 mac_header_info_t mhi; 1010 uint_t pri, vid; 1011 1012 /* 1013 * Certain MAC type plugins provide an illusion for raw DLPI 1014 * consumers. They pretend that the MAC layer is something that 1015 * it's not for the benefit of observability tools. For example, 1016 * mac_wifi pretends that it's Ethernet for such consumers. 1017 * Here, unless native mode is enabled, we call into the MAC layer so 1018 * that this illusion can be maintained. The plugin will optionally 1019 * transform the MAC header here into something that can be passed 1020 * down. The header goes from raw mode to "cooked" mode. 1021 */ 1022 if (!dsp->ds_native) { 1023 if ((newmp = mac_header_cook(dsp->ds_mh, mp)) == NULL) 1024 goto discard; 1025 mp = newmp; 1026 } 1027 1028 size = MBLKL(mp); 1029 1030 /* 1031 * Check the packet is not too big and that any remaining 1032 * fragment list is composed entirely of M_DATA messages. (We 1033 * know the first fragment was M_DATA otherwise we could not 1034 * have got here). 1035 */ 1036 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 1037 if (DB_TYPE(bp) != M_DATA) 1038 goto discard; 1039 size += MBLKL(bp); 1040 } 1041 1042 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 1043 goto discard; 1044 1045 /* 1046 * If LSO is enabled, check the size against lso_max. Otherwise, 1047 * compare the packet size with sdu_max. 1048 */ 1049 if (size > (dsp->ds_lso ? dsp->ds_lso_max : dsp->ds_mip->mi_sdu_max) 1050 + mhi.mhi_hdrsize) 1051 goto discard; 1052 1053 if (is_ethernet) { 1054 /* 1055 * Discard the packet if this is a VLAN stream but the VID in 1056 * the packet is not correct. 1057 */ 1058 vid = VLAN_ID(mhi.mhi_tci); 1059 if ((dsp->ds_vid != VLAN_ID_NONE) && (vid != VLAN_ID_NONE)) 1060 goto discard; 1061 1062 /* 1063 * Discard the packet if this packet is a tagged packet 1064 * but both pri and VID are 0. 1065 */ 1066 pri = VLAN_PRI(mhi.mhi_tci); 1067 if (mhi.mhi_istagged && (pri == 0) && (vid == VLAN_ID_NONE)) 1068 goto discard; 1069 1070 /* 1071 * Update the priority bits to the per-stream priority if 1072 * priority is not set in the packet. Update the VID for 1073 * packets on a VLAN stream. 1074 */ 1075 pri = (pri == 0) ? dsp->ds_pri : 0; 1076 if ((pri != 0) || (dsp->ds_vid != VLAN_ID_NONE)) { 1077 if ((newmp = i_dld_ether_header_update_tag(mp, 1078 pri, dsp->ds_vid)) == NULL) { 1079 goto discard; 1080 } 1081 mp = newmp; 1082 } 1083 } 1084 1085 dld_tx_single(dsp, mp); 1086 return; 1087 1088 discard: 1089 /* TODO: bump kstat? */ 1090 freemsg(mp); 1091 } 1092 1093 /* 1094 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 1095 */ 1096 int 1097 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 1098 { 1099 dev_t dev; 1100 int err; 1101 const char *drvname; 1102 dls_channel_t dc; 1103 uint_t addr_length; 1104 boolean_t qassociated = B_FALSE; 1105 1106 ASSERT(dsp->ds_dc == NULL); 1107 1108 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 1109 return (EINVAL); 1110 1111 /* 1112 * /dev node access. This will still be supported for backward 1113 * compatibility reason. 1114 */ 1115 if ((dsp->ds_style == DL_STYLE2) && (strcmp(drvname, "aggr") != 0) && 1116 (strcmp(drvname, "vnic") != 0)) { 1117 if (qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 1118 return (EINVAL); 1119 qassociated = B_TRUE; 1120 } 1121 1122 /* 1123 * Open a channel. 1124 */ 1125 if (dsp->ds_style == DL_STYLE2 && ppa > DLS_MAX_PPA) { 1126 /* 1127 * style-2 VLAN open, this is a /dev VLAN ppa open 1128 * which might result in a newly created dls_vlan_t. 1129 */ 1130 err = dls_open_style2_vlan(dsp->ds_major, ppa, &dc); 1131 if (err != 0) { 1132 if (qassociated) 1133 (void) qassociate(dsp->ds_wq, -1); 1134 return (err); 1135 } 1136 } else { 1137 dev = makedevice(dsp->ds_major, (minor_t)ppa + 1); 1138 if ((err = dls_open_by_dev(dev, &dc)) != 0) { 1139 if (qassociated) 1140 (void) qassociate(dsp->ds_wq, -1); 1141 return (err); 1142 } 1143 } 1144 1145 /* 1146 * Cache the MAC interface handle, a pointer to the immutable MAC 1147 * information and the current and 'factory' MAC address. 1148 */ 1149 dsp->ds_mh = dls_mac(dc); 1150 dsp->ds_mip = mac_info(dsp->ds_mh); 1151 1152 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1153 1154 addr_length = dsp->ds_mip->mi_addr_length; 1155 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 1156 1157 /* 1158 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 1159 * a non-VLAN interface). 1160 */ 1161 dsp->ds_vid = dls_vid(dc); 1162 1163 /* 1164 * Set the default packet priority. 1165 */ 1166 dsp->ds_pri = 0; 1167 1168 /* 1169 * Add a notify function so that the we get updates from the MAC. 1170 */ 1171 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 1172 1173 dsp->ds_dc = dc; 1174 dsp->ds_dlstate = DL_UNBOUND; 1175 1176 return (0); 1177 } 1178 1179 /* 1180 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 1181 * from close(2) for style 2. 1182 */ 1183 void 1184 dld_str_detach(dld_str_t *dsp) 1185 { 1186 /* 1187 * Remove the notify function. 1188 */ 1189 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 1190 1191 /* 1192 * Disable the capabilities and clear the promisc flag. 1193 */ 1194 ASSERT(!dsp->ds_polling); 1195 ASSERT(!dsp->ds_soft_ring); 1196 dld_capabilities_disable(dsp); 1197 dsp->ds_promisc = 0; 1198 1199 DLD_TX_QUIESCE(dsp); 1200 1201 /* 1202 * Flush all pending packets which are sitting in the transmit queue. 1203 */ 1204 dld_tx_flush(dsp); 1205 1206 /* 1207 * Clear LSO flags. 1208 */ 1209 dsp->ds_lso = B_FALSE; 1210 dsp->ds_lso_max = 0; 1211 1212 dls_close(dsp->ds_dc); 1213 dsp->ds_dc = NULL; 1214 dsp->ds_mh = NULL; 1215 1216 if (dsp->ds_style == DL_STYLE2) 1217 (void) qassociate(dsp->ds_wq, -1); 1218 1219 /* 1220 * Re-initialize the DLPI state machine. 1221 */ 1222 dsp->ds_dlstate = DL_UNATTACHED; 1223 1224 } 1225 1226 /* 1227 * This function is only called for VLAN streams. In raw mode, we strip VLAN 1228 * tags before sending packets up to the DLS clients, with the exception of 1229 * special priority tagged packets, in that case, we set the VID to 0. 1230 * mp must be a VLAN tagged packet. 1231 */ 1232 static mblk_t * 1233 i_dld_ether_header_strip_tag(mblk_t *mp) 1234 { 1235 mblk_t *newmp; 1236 struct ether_vlan_header *evhp; 1237 uint16_t tci, new_tci; 1238 1239 ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 1240 if (DB_REF(mp) > 1) { 1241 newmp = copymsg(mp); 1242 if (newmp == NULL) 1243 return (NULL); 1244 freemsg(mp); 1245 mp = newmp; 1246 } 1247 evhp = (struct ether_vlan_header *)mp->b_rptr; 1248 1249 tci = ntohs(evhp->ether_tci); 1250 if (VLAN_PRI(tci) == 0) { 1251 /* 1252 * Priority is 0, strip the tag. 1253 */ 1254 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 1255 mp->b_rptr += VLAN_TAGSZ; 1256 } else { 1257 /* 1258 * Priority is not 0, update the VID to 0. 1259 */ 1260 new_tci = VLAN_TCI(VLAN_PRI(tci), VLAN_CFI(tci), VLAN_ID_NONE); 1261 evhp->ether_tci = htons(new_tci); 1262 } 1263 return (mp); 1264 } 1265 1266 /* 1267 * Raw mode receive function. 1268 */ 1269 /*ARGSUSED*/ 1270 void 1271 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1272 mac_header_info_t *mhip) 1273 { 1274 dld_str_t *dsp = (dld_str_t *)arg; 1275 boolean_t is_ethernet = (dsp->ds_mip->mi_media == DL_ETHER); 1276 mblk_t *next, *newmp; 1277 1278 ASSERT(mp != NULL); 1279 do { 1280 /* 1281 * Get the pointer to the next packet in the chain and then 1282 * clear b_next before the packet gets passed on. 1283 */ 1284 next = mp->b_next; 1285 mp->b_next = NULL; 1286 1287 /* 1288 * Wind back b_rptr to point at the MAC header. 1289 */ 1290 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1291 mp->b_rptr -= mhip->mhi_hdrsize; 1292 1293 /* 1294 * Certain MAC type plugins provide an illusion for raw 1295 * DLPI consumers. They pretend that the MAC layer is 1296 * something that it's not for the benefit of observability 1297 * tools. For example, mac_wifi pretends that it's Ethernet 1298 * for such consumers. Here, unless native mode is enabled, 1299 * we call into the MAC layer so that this illusion can be 1300 * maintained. The plugin will optionally transform the MAC 1301 * header here into something that can be passed up to raw 1302 * consumers. The header goes from "cooked" mode to raw mode. 1303 */ 1304 if (!dsp->ds_native) { 1305 newmp = mac_header_uncook(dsp->ds_mh, mp); 1306 if (newmp == NULL) { 1307 freemsg(mp); 1308 goto next; 1309 } 1310 mp = newmp; 1311 } 1312 1313 /* 1314 * Strip the VLAN tag for VLAN streams. 1315 */ 1316 if (is_ethernet && dsp->ds_vid != VLAN_ID_NONE) { 1317 newmp = i_dld_ether_header_strip_tag(mp); 1318 if (newmp == NULL) { 1319 freemsg(mp); 1320 goto next; 1321 } 1322 mp = newmp; 1323 } 1324 1325 /* 1326 * Pass the packet on. 1327 */ 1328 if (canputnext(dsp->ds_rq)) 1329 putnext(dsp->ds_rq, mp); 1330 else 1331 freemsg(mp); 1332 1333 next: 1334 /* 1335 * Move on to the next packet in the chain. 1336 */ 1337 mp = next; 1338 } while (mp != NULL); 1339 } 1340 1341 /* 1342 * Fast-path receive function. 1343 */ 1344 /*ARGSUSED*/ 1345 void 1346 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1347 mac_header_info_t *mhip) 1348 { 1349 dld_str_t *dsp = (dld_str_t *)arg; 1350 mblk_t *next; 1351 size_t offset = 0; 1352 1353 /* 1354 * MAC header stripping rules: 1355 * - Tagged packets: 1356 * a. VLAN streams. Strip the whole VLAN header including the tag. 1357 * b. Physical streams 1358 * - VLAN packets (non-zero VID). The stream must be either a 1359 * DL_PROMISC_SAP listener or a ETHERTYPE_VLAN listener. 1360 * Strip the Ethernet header but keep the VLAN header. 1361 * - Special tagged packets (zero VID) 1362 * * The stream is either a DL_PROMISC_SAP listener or a 1363 * ETHERTYPE_VLAN listener, strip the Ethernet header but 1364 * keep the VLAN header. 1365 * * Otherwise, strip the whole VLAN header. 1366 * - Untagged packets. Strip the whole MAC header. 1367 */ 1368 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1369 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1370 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1371 offset = VLAN_TAGSZ; 1372 } 1373 1374 ASSERT(mp != NULL); 1375 do { 1376 /* 1377 * Get the pointer to the next packet in the chain and then 1378 * clear b_next before the packet gets passed on. 1379 */ 1380 next = mp->b_next; 1381 mp->b_next = NULL; 1382 1383 /* 1384 * Wind back b_rptr to point at the VLAN header. 1385 */ 1386 ASSERT(mp->b_rptr >= DB_BASE(mp) + offset); 1387 mp->b_rptr -= offset; 1388 1389 /* 1390 * Pass the packet on. 1391 */ 1392 if (canputnext(dsp->ds_rq)) 1393 putnext(dsp->ds_rq, mp); 1394 else 1395 freemsg(mp); 1396 /* 1397 * Move on to the next packet in the chain. 1398 */ 1399 mp = next; 1400 } while (mp != NULL); 1401 } 1402 1403 /* 1404 * Default receive function (send DL_UNITDATA_IND messages). 1405 */ 1406 /*ARGSUSED*/ 1407 void 1408 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 1409 mac_header_info_t *mhip) 1410 { 1411 dld_str_t *dsp = (dld_str_t *)arg; 1412 mblk_t *ud_mp; 1413 mblk_t *next; 1414 size_t offset = 0; 1415 boolean_t strip_vlan = B_TRUE; 1416 1417 /* 1418 * See MAC header stripping rules in the dld_str_rx_fastpath() function. 1419 */ 1420 if (mhip->mhi_istagged && (dsp->ds_vid == VLAN_ID_NONE) && 1421 ((dsp->ds_sap == ETHERTYPE_VLAN) || 1422 (dsp->ds_promisc & DLS_PROMISC_SAP))) { 1423 offset = VLAN_TAGSZ; 1424 strip_vlan = B_FALSE; 1425 } 1426 1427 ASSERT(mp != NULL); 1428 do { 1429 /* 1430 * Get the pointer to the next packet in the chain and then 1431 * clear b_next before the packet gets passed on. 1432 */ 1433 next = mp->b_next; 1434 mp->b_next = NULL; 1435 1436 /* 1437 * Wind back b_rptr to point at the MAC header. 1438 */ 1439 ASSERT(mp->b_rptr >= DB_BASE(mp) + mhip->mhi_hdrsize); 1440 mp->b_rptr -= mhip->mhi_hdrsize; 1441 1442 /* 1443 * Create the DL_UNITDATA_IND M_PROTO. 1444 */ 1445 if ((ud_mp = str_unitdata_ind(dsp, mp, strip_vlan)) == NULL) { 1446 freemsgchain(mp); 1447 return; 1448 } 1449 1450 /* 1451 * Advance b_rptr to point at the payload (or the VLAN header). 1452 */ 1453 mp->b_rptr += (mhip->mhi_hdrsize - offset); 1454 1455 /* 1456 * Prepend the DL_UNITDATA_IND. 1457 */ 1458 ud_mp->b_cont = mp; 1459 1460 /* 1461 * Send the message. 1462 */ 1463 if (canputnext(dsp->ds_rq)) 1464 putnext(dsp->ds_rq, ud_mp); 1465 else 1466 freemsg(ud_mp); 1467 1468 /* 1469 * Move on to the next packet in the chain. 1470 */ 1471 mp = next; 1472 } while (mp != NULL); 1473 } 1474 1475 /* 1476 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1477 * current state of the interface. 1478 */ 1479 void 1480 dld_str_notify_ind(dld_str_t *dsp) 1481 { 1482 mac_notify_type_t type; 1483 1484 for (type = 0; type < MAC_NNOTE; type++) 1485 str_notify(dsp, type); 1486 } 1487 1488 typedef struct dl_unitdata_ind_wrapper { 1489 dl_unitdata_ind_t dl_unitdata; 1490 uint8_t dl_dest_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1491 uint8_t dl_src_addr[MAXMACADDRLEN + sizeof (uint16_t)]; 1492 } dl_unitdata_ind_wrapper_t; 1493 1494 /* 1495 * Create a DL_UNITDATA_IND M_PROTO message. 1496 */ 1497 static mblk_t * 1498 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp, boolean_t strip_vlan) 1499 { 1500 mblk_t *nmp; 1501 dl_unitdata_ind_wrapper_t *dlwp; 1502 dl_unitdata_ind_t *dlp; 1503 mac_header_info_t mhi; 1504 uint_t addr_length; 1505 uint8_t *daddr; 1506 uint8_t *saddr; 1507 1508 /* 1509 * Get the packet header information. 1510 */ 1511 if (dls_header_info(dsp->ds_dc, mp, &mhi) != 0) 1512 return (NULL); 1513 1514 /* 1515 * Allocate a message large enough to contain the wrapper structure 1516 * defined above. 1517 */ 1518 if ((nmp = mexchange(dsp->ds_wq, NULL, 1519 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1520 DL_UNITDATA_IND)) == NULL) 1521 return (NULL); 1522 1523 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1524 1525 dlp = &(dlwp->dl_unitdata); 1526 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1527 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1528 1529 /* 1530 * Copy in the destination address. 1531 */ 1532 addr_length = dsp->ds_mip->mi_addr_length; 1533 daddr = dlwp->dl_dest_addr; 1534 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1535 bcopy(mhi.mhi_daddr, daddr, addr_length); 1536 1537 /* 1538 * Set the destination DLSAP to the SAP value encoded in the packet. 1539 */ 1540 if (mhi.mhi_istagged && !strip_vlan) 1541 *(uint16_t *)(daddr + addr_length) = ETHERTYPE_VLAN; 1542 else 1543 *(uint16_t *)(daddr + addr_length) = mhi.mhi_bindsap; 1544 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1545 1546 /* 1547 * If the destination address was multicast or broadcast then the 1548 * dl_group_address field should be non-zero. 1549 */ 1550 dlp->dl_group_address = (mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) || 1551 (mhi.mhi_dsttype == MAC_ADDRTYPE_BROADCAST); 1552 1553 /* 1554 * Copy in the source address if one exists. Some MAC types (DL_IB 1555 * for example) may not have access to source information. 1556 */ 1557 if (mhi.mhi_saddr == NULL) { 1558 dlp->dl_src_addr_offset = dlp->dl_src_addr_length = 0; 1559 } else { 1560 saddr = dlwp->dl_src_addr; 1561 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1562 bcopy(mhi.mhi_saddr, saddr, addr_length); 1563 1564 /* 1565 * Set the source DLSAP to the packet ethertype. 1566 */ 1567 *(uint16_t *)(saddr + addr_length) = mhi.mhi_origsap; 1568 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1569 } 1570 1571 return (nmp); 1572 } 1573 1574 /* 1575 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1576 */ 1577 static void 1578 str_notify_promisc_on_phys(dld_str_t *dsp) 1579 { 1580 mblk_t *mp; 1581 dl_notify_ind_t *dlip; 1582 1583 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1584 return; 1585 1586 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1587 M_PROTO, 0)) == NULL) 1588 return; 1589 1590 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1591 dlip = (dl_notify_ind_t *)mp->b_rptr; 1592 dlip->dl_primitive = DL_NOTIFY_IND; 1593 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1594 1595 qreply(dsp->ds_wq, mp); 1596 } 1597 1598 /* 1599 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1600 */ 1601 static void 1602 str_notify_promisc_off_phys(dld_str_t *dsp) 1603 { 1604 mblk_t *mp; 1605 dl_notify_ind_t *dlip; 1606 1607 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1608 return; 1609 1610 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1611 M_PROTO, 0)) == NULL) 1612 return; 1613 1614 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1615 dlip = (dl_notify_ind_t *)mp->b_rptr; 1616 dlip->dl_primitive = DL_NOTIFY_IND; 1617 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1618 1619 qreply(dsp->ds_wq, mp); 1620 } 1621 1622 /* 1623 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1624 */ 1625 static void 1626 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1627 { 1628 mblk_t *mp; 1629 dl_notify_ind_t *dlip; 1630 uint_t addr_length; 1631 uint16_t ethertype; 1632 1633 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1634 return; 1635 1636 addr_length = dsp->ds_mip->mi_addr_length; 1637 if ((mp = mexchange(dsp->ds_wq, NULL, 1638 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1639 M_PROTO, 0)) == NULL) 1640 return; 1641 1642 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1643 dlip = (dl_notify_ind_t *)mp->b_rptr; 1644 dlip->dl_primitive = DL_NOTIFY_IND; 1645 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1646 dlip->dl_data = DL_CURR_PHYS_ADDR; 1647 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1648 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1649 1650 bcopy(addr, &dlip[1], addr_length); 1651 1652 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1653 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = ethertype; 1654 1655 qreply(dsp->ds_wq, mp); 1656 } 1657 1658 /* 1659 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1660 */ 1661 static void 1662 str_notify_link_up(dld_str_t *dsp) 1663 { 1664 mblk_t *mp; 1665 dl_notify_ind_t *dlip; 1666 1667 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1668 return; 1669 1670 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1671 M_PROTO, 0)) == NULL) 1672 return; 1673 1674 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1675 dlip = (dl_notify_ind_t *)mp->b_rptr; 1676 dlip->dl_primitive = DL_NOTIFY_IND; 1677 dlip->dl_notification = DL_NOTE_LINK_UP; 1678 1679 qreply(dsp->ds_wq, mp); 1680 } 1681 1682 /* 1683 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1684 */ 1685 static void 1686 str_notify_link_down(dld_str_t *dsp) 1687 { 1688 mblk_t *mp; 1689 dl_notify_ind_t *dlip; 1690 1691 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1692 return; 1693 1694 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1695 M_PROTO, 0)) == NULL) 1696 return; 1697 1698 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1699 dlip = (dl_notify_ind_t *)mp->b_rptr; 1700 dlip->dl_primitive = DL_NOTIFY_IND; 1701 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1702 1703 qreply(dsp->ds_wq, mp); 1704 } 1705 1706 /* 1707 * DL_NOTIFY_IND: DL_NOTE_SPEED 1708 */ 1709 static void 1710 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1711 { 1712 mblk_t *mp; 1713 dl_notify_ind_t *dlip; 1714 1715 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1716 return; 1717 1718 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1719 M_PROTO, 0)) == NULL) 1720 return; 1721 1722 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1723 dlip = (dl_notify_ind_t *)mp->b_rptr; 1724 dlip->dl_primitive = DL_NOTIFY_IND; 1725 dlip->dl_notification = DL_NOTE_SPEED; 1726 dlip->dl_data = speed; 1727 1728 qreply(dsp->ds_wq, mp); 1729 } 1730 1731 /* 1732 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1733 */ 1734 static void 1735 str_notify_capab_reneg(dld_str_t *dsp) 1736 { 1737 mblk_t *mp; 1738 dl_notify_ind_t *dlip; 1739 1740 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1741 return; 1742 1743 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1744 M_PROTO, 0)) == NULL) 1745 return; 1746 1747 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1748 dlip = (dl_notify_ind_t *)mp->b_rptr; 1749 dlip->dl_primitive = DL_NOTIFY_IND; 1750 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1751 1752 qreply(dsp->ds_wq, mp); 1753 } 1754 1755 /* 1756 * DL_NOTIFY_IND: DL_NOTE_FASTPATH_FLUSH 1757 */ 1758 static void 1759 str_notify_fastpath_flush(dld_str_t *dsp) 1760 { 1761 mblk_t *mp; 1762 dl_notify_ind_t *dlip; 1763 1764 if (!(dsp->ds_notifications & DL_NOTE_FASTPATH_FLUSH)) 1765 return; 1766 1767 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1768 M_PROTO, 0)) == NULL) 1769 return; 1770 1771 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1772 dlip = (dl_notify_ind_t *)mp->b_rptr; 1773 dlip->dl_primitive = DL_NOTIFY_IND; 1774 dlip->dl_notification = DL_NOTE_FASTPATH_FLUSH; 1775 1776 qreply(dsp->ds_wq, mp); 1777 } 1778 1779 /* 1780 * MAC notification callback. 1781 */ 1782 static void 1783 str_notify(void *arg, mac_notify_type_t type) 1784 { 1785 dld_str_t *dsp = (dld_str_t *)arg; 1786 queue_t *q = dsp->ds_wq; 1787 1788 switch (type) { 1789 case MAC_NOTE_TX: 1790 qenable(q); 1791 break; 1792 1793 case MAC_NOTE_DEVPROMISC: 1794 /* 1795 * Send the appropriate DL_NOTIFY_IND. 1796 */ 1797 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1798 str_notify_promisc_on_phys(dsp); 1799 else 1800 str_notify_promisc_off_phys(dsp); 1801 break; 1802 1803 case MAC_NOTE_PROMISC: 1804 break; 1805 1806 case MAC_NOTE_UNICST: 1807 /* 1808 * This notification is sent whenever the MAC unicast address 1809 * changes. We need to re-cache the address. 1810 */ 1811 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1812 1813 /* 1814 * Send the appropriate DL_NOTIFY_IND. 1815 */ 1816 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1817 break; 1818 1819 case MAC_NOTE_LINK: 1820 /* 1821 * This notification is sent every time the MAC driver 1822 * updates the link state. 1823 */ 1824 switch (mac_link_get(dsp->ds_mh)) { 1825 case LINK_STATE_UP: { 1826 uint64_t speed; 1827 /* 1828 * The link is up so send the appropriate 1829 * DL_NOTIFY_IND. 1830 */ 1831 str_notify_link_up(dsp); 1832 1833 speed = mac_stat_get(dsp->ds_mh, MAC_STAT_IFSPEED); 1834 str_notify_speed(dsp, (uint32_t)(speed / 1000ull)); 1835 break; 1836 } 1837 case LINK_STATE_DOWN: 1838 /* 1839 * The link is down so send the appropriate 1840 * DL_NOTIFY_IND. 1841 */ 1842 str_notify_link_down(dsp); 1843 break; 1844 1845 default: 1846 break; 1847 } 1848 break; 1849 1850 case MAC_NOTE_RESOURCE: 1851 case MAC_NOTE_VNIC: 1852 /* 1853 * This notification is sent whenever the MAC resources 1854 * change or capabilities change. We need to renegotiate 1855 * the capabilities. Send the appropriate DL_NOTIFY_IND. 1856 */ 1857 str_notify_capab_reneg(dsp); 1858 break; 1859 1860 case MAC_NOTE_FASTPATH_FLUSH: 1861 str_notify_fastpath_flush(dsp); 1862 break; 1863 1864 case MAC_NOTE_MARGIN: 1865 break; 1866 default: 1867 ASSERT(B_FALSE); 1868 break; 1869 } 1870 } 1871 1872 static inline uint_t 1873 mp_getsize(mblk_t *mp) 1874 { 1875 ASSERT(DB_TYPE(mp) == M_DATA); 1876 return ((mp->b_cont == NULL) ? MBLKL(mp) : msgdsize(mp)); 1877 } 1878 1879 /* 1880 * Calculate the dld queue depth, free the messages that exceed the threshold. 1881 */ 1882 static void 1883 dld_tx_qdepth_timer(void *arg) 1884 { 1885 dld_str_t *dsp = (dld_str_t *)arg; 1886 mblk_t *prev, *mp; 1887 uint_t cnt, msgcnt, size; 1888 1889 mutex_enter(&dsp->ds_tx_list_lock); 1890 1891 /* Calculate total size and count of the packet(s) */ 1892 cnt = msgcnt = 0; 1893 for (prev = NULL, mp = dsp->ds_tx_list_head; mp != NULL; 1894 prev = mp, mp = mp->b_next) { 1895 size = mp_getsize(mp); 1896 cnt += size; 1897 msgcnt++; 1898 if (cnt >= dld_max_q_count || msgcnt >= dld_max_q_count) { 1899 ASSERT(dsp->ds_tx_qbusy); 1900 dsp->ds_tx_list_tail = prev; 1901 if (prev == NULL) 1902 dsp->ds_tx_list_head = NULL; 1903 else 1904 prev->b_next = NULL; 1905 freemsgchain(mp); 1906 cnt -= size; 1907 msgcnt--; 1908 break; 1909 } 1910 } 1911 dsp->ds_tx_cnt = cnt; 1912 dsp->ds_tx_msgcnt = msgcnt; 1913 dsp->ds_tx_qdepth_tid = 0; 1914 mutex_exit(&dsp->ds_tx_list_lock); 1915 } 1916 1917 /* 1918 * Enqueue one or more messages on the transmit queue. Caller specifies: 1919 * - the insertion position (head/tail). 1920 * - the message count and the total message size of messages to be queued 1921 * if they are known to the caller; or 0 if they are not known. 1922 * 1923 * If the caller does not know the message size information, this usually 1924 * means that dld_wsrv() managed to send some but not all of the queued 1925 * messages. For performance reasons, we do not calculate the queue depth 1926 * every time. Instead, a timer is started to calculate the queue depth 1927 * every 1 second (can be changed by tx_qdepth_interval). 1928 */ 1929 static void 1930 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, mblk_t *tail, boolean_t head_insert, 1931 uint_t msgcnt, uint_t cnt) 1932 { 1933 queue_t *q = dsp->ds_wq; 1934 uint_t tot_cnt, tot_msgcnt; 1935 mblk_t *next; 1936 1937 mutex_enter(&dsp->ds_tx_list_lock); 1938 1939 /* 1940 * Simply enqueue the message and calculate the queue depth via 1941 * timer if: 1942 * 1943 * - the current queue depth is incorrect, and the timer is already 1944 * started; or 1945 * 1946 * - the given message size is unknown and it is allowed to start the 1947 * timer; 1948 */ 1949 if ((dsp->ds_tx_qdepth_tid != 0) || 1950 (msgcnt == 0 && tx_qdepth_interval != 0)) { 1951 goto enqueue; 1952 } 1953 1954 /* 1955 * The timer is not allowed, so calculate the message size now. 1956 */ 1957 if (msgcnt == 0) { 1958 for (next = mp; next != NULL; next = next->b_next) { 1959 cnt += mp_getsize(next); 1960 msgcnt++; 1961 } 1962 } 1963 1964 /* 1965 * Grow the queue depth using the input messesge size. 1966 * 1967 * If the queue depth would exceed the allowed threshold, drop 1968 * new packet(s) and drain those already in the queue. 1969 */ 1970 tot_cnt = dsp->ds_tx_cnt + cnt; 1971 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1972 1973 if (!head_insert && (tot_cnt >= dld_max_q_count || 1974 tot_msgcnt >= dld_max_q_count)) { 1975 ASSERT(dsp->ds_tx_qbusy); 1976 mutex_exit(&dsp->ds_tx_list_lock); 1977 freemsgchain(mp); 1978 goto done; 1979 } 1980 /* Update the queue size parameters */ 1981 dsp->ds_tx_cnt = tot_cnt; 1982 dsp->ds_tx_msgcnt = tot_msgcnt; 1983 1984 enqueue: 1985 /* 1986 * If the transmit queue is currently empty and we are 1987 * about to deposit the packet(s) there, switch mode to 1988 * "busy" and raise flow-control condition. 1989 */ 1990 if (!dsp->ds_tx_qbusy) { 1991 dsp->ds_tx_qbusy = B_TRUE; 1992 ASSERT(dsp->ds_tx_flow_mp != NULL); 1993 (void) putq(q, dsp->ds_tx_flow_mp); 1994 dsp->ds_tx_flow_mp = NULL; 1995 } 1996 1997 if (!head_insert) { 1998 /* Tail insertion */ 1999 if (dsp->ds_tx_list_head == NULL) 2000 dsp->ds_tx_list_head = mp; 2001 else 2002 dsp->ds_tx_list_tail->b_next = mp; 2003 dsp->ds_tx_list_tail = tail; 2004 } else { 2005 /* Head insertion */ 2006 tail->b_next = dsp->ds_tx_list_head; 2007 if (dsp->ds_tx_list_head == NULL) 2008 dsp->ds_tx_list_tail = tail; 2009 dsp->ds_tx_list_head = mp; 2010 } 2011 2012 if (msgcnt == 0 && dsp->ds_tx_qdepth_tid == 0 && 2013 tx_qdepth_interval != 0) { 2014 /* 2015 * The message size is not given so that we need to start 2016 * the timer to calculate the queue depth. 2017 */ 2018 dsp->ds_tx_qdepth_tid = timeout(dld_tx_qdepth_timer, dsp, 2019 drv_usectohz(tx_qdepth_interval)); 2020 ASSERT(dsp->ds_tx_qdepth_tid != NULL); 2021 } 2022 mutex_exit(&dsp->ds_tx_list_lock); 2023 done: 2024 /* Schedule service thread to drain the transmit queue */ 2025 if (!head_insert) 2026 qenable(q); 2027 } 2028 2029 void 2030 dld_tx_flush(dld_str_t *dsp) 2031 { 2032 timeout_id_t tid = 0; 2033 2034 mutex_enter(&dsp->ds_tx_list_lock); 2035 if (dsp->ds_tx_list_head != NULL) { 2036 freemsgchain(dsp->ds_tx_list_head); 2037 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 2038 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 2039 if (dsp->ds_tx_qbusy) { 2040 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 2041 ASSERT(dsp->ds_tx_flow_mp != NULL); 2042 dsp->ds_tx_qbusy = B_FALSE; 2043 } 2044 if ((tid = dsp->ds_tx_qdepth_tid) != 0) 2045 dsp->ds_tx_qdepth_tid = 0; 2046 } 2047 mutex_exit(&dsp->ds_tx_list_lock); 2048 2049 /* 2050 * Note that ds_tx_list_lock (which is acquired by the timeout 2051 * callback routine) cannot be held across the call to untimeout(). 2052 */ 2053 if (tid != 0) 2054 (void) untimeout(tid); 2055 } 2056 2057 /* 2058 * Process a non-data message. 2059 */ 2060 static void 2061 dld_wput_nondata(dld_str_t *dsp, mblk_t *mp) 2062 { 2063 ASSERT((dsp->ds_type == DLD_DLPI && dsp->ds_ioctl == NULL) || 2064 (dsp->ds_type == DLD_CONTROL && dsp->ds_ioctl != NULL)); 2065 2066 mutex_enter(&dsp->ds_disp_lock); 2067 2068 /* 2069 * The processing of the message might block. Enqueue the 2070 * message for later processing. 2071 */ 2072 if (dsp->ds_pending_head == NULL) { 2073 dsp->ds_pending_head = dsp->ds_pending_tail = mp; 2074 } else { 2075 dsp->ds_pending_tail->b_next = mp; 2076 dsp->ds_pending_tail = mp; 2077 } 2078 2079 /* 2080 * If there is no task pending, kick off the task. 2081 */ 2082 if (dsp->ds_tid == NULL) { 2083 dsp->ds_tid = taskq_dispatch(dld_disp_taskq, 2084 dld_wput_nondata_task, dsp, TQ_SLEEP); 2085 ASSERT(dsp->ds_tid != NULL); 2086 } 2087 mutex_exit(&dsp->ds_disp_lock); 2088 } 2089 2090 /* 2091 * The worker thread which processes non-data messages. Note we only process 2092 * one message at one time in order to be able to "flush" the queued message 2093 * and serialize the processing. 2094 */ 2095 static void 2096 dld_wput_nondata_task(void *arg) 2097 { 2098 dld_str_t *dsp = (dld_str_t *)arg; 2099 mblk_t *mp; 2100 2101 mutex_enter(&dsp->ds_disp_lock); 2102 ASSERT(dsp->ds_pending_head != NULL); 2103 ASSERT(dsp->ds_tid != NULL); 2104 2105 if (dsp->ds_closing) 2106 goto closing; 2107 2108 mp = dsp->ds_pending_head; 2109 if ((dsp->ds_pending_head = mp->b_next) == NULL) 2110 dsp->ds_pending_tail = NULL; 2111 mp->b_next = NULL; 2112 2113 mutex_exit(&dsp->ds_disp_lock); 2114 2115 switch (DB_TYPE(mp)) { 2116 case M_PROTO: 2117 case M_PCPROTO: 2118 ASSERT(dsp->ds_type == DLD_DLPI); 2119 dld_wput_proto_nondata(dsp, mp); 2120 break; 2121 case M_IOCTL: { 2122 uint_t cmd; 2123 2124 if (dsp->ds_type == DLD_CONTROL) { 2125 ASSERT(dsp->ds_ioctl != NULL); 2126 dsp->ds_ioctl(dsp->ds_wq, mp); 2127 break; 2128 } 2129 2130 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 2131 2132 switch (cmd) { 2133 case DLIOCNATIVE: 2134 ioc_native(dsp, mp); 2135 break; 2136 case DLIOCMARGININFO: 2137 ioc_margin(dsp, mp); 2138 break; 2139 case DLIOCRAW: 2140 ioc_raw(dsp, mp); 2141 break; 2142 case DLIOCHDRINFO: 2143 ioc_fast(dsp, mp); 2144 break; 2145 default: 2146 ioc(dsp, mp); 2147 break; 2148 } 2149 break; 2150 } 2151 case M_IOCDATA: 2152 ASSERT(dsp->ds_type == DLD_DLPI); 2153 ioc(dsp, mp); 2154 break; 2155 } 2156 2157 mutex_enter(&dsp->ds_disp_lock); 2158 2159 if (dsp->ds_closing) 2160 goto closing; 2161 2162 if (dsp->ds_pending_head != NULL) { 2163 dsp->ds_tid = taskq_dispatch(dld_disp_taskq, 2164 dld_wput_nondata_task, dsp, TQ_SLEEP); 2165 ASSERT(dsp->ds_tid != NULL); 2166 } else { 2167 dsp->ds_tid = NULL; 2168 } 2169 mutex_exit(&dsp->ds_disp_lock); 2170 return; 2171 2172 /* 2173 * If the stream is closing, flush all queued messages and inform 2174 * the stream once it is done. 2175 */ 2176 closing: 2177 freemsgchain(dsp->ds_pending_head); 2178 dsp->ds_pending_head = dsp->ds_pending_tail = NULL; 2179 dsp->ds_tid = NULL; 2180 cv_signal(&dsp->ds_disp_cv); 2181 mutex_exit(&dsp->ds_disp_lock); 2182 } 2183 2184 /* 2185 * Flush queued non-data messages. 2186 */ 2187 static void 2188 dld_flush_nondata(dld_str_t *dsp) 2189 { 2190 mutex_enter(&dsp->ds_disp_lock); 2191 freemsgchain(dsp->ds_pending_head); 2192 dsp->ds_pending_head = dsp->ds_pending_tail = NULL; 2193 mutex_exit(&dsp->ds_disp_lock); 2194 } 2195 2196 /* 2197 * DLIOCNATIVE 2198 */ 2199 static void 2200 ioc_native(dld_str_t *dsp, mblk_t *mp) 2201 { 2202 queue_t *q = dsp->ds_wq; 2203 const mac_info_t *mip = dsp->ds_mip; 2204 2205 rw_enter(&dsp->ds_lock, RW_WRITER); 2206 2207 /* 2208 * Native mode can be enabled if it's disabled and if the 2209 * native media type is different. 2210 */ 2211 if (!dsp->ds_native && mip->mi_media != mip->mi_nativemedia) 2212 dsp->ds_native = B_TRUE; 2213 2214 rw_exit(&dsp->ds_lock); 2215 2216 if (dsp->ds_native) 2217 miocack(q, mp, 0, mip->mi_nativemedia); 2218 else 2219 miocnak(q, mp, 0, ENOTSUP); 2220 } 2221 2222 /* 2223 * DLIOCMARGININFO 2224 */ 2225 static void 2226 ioc_margin(dld_str_t *dsp, mblk_t *mp) 2227 { 2228 queue_t *q = dsp->ds_wq; 2229 uint32_t margin; 2230 int err; 2231 2232 if (dsp->ds_dlstate == DL_UNATTACHED) { 2233 err = EINVAL; 2234 goto failed; 2235 } 2236 if ((err = miocpullup(mp, sizeof (uint32_t))) != 0) 2237 goto failed; 2238 2239 mac_margin_get(dsp->ds_mh, &margin); 2240 *((uint32_t *)mp->b_cont->b_rptr) = margin; 2241 miocack(q, mp, sizeof (uint32_t), 0); 2242 return; 2243 2244 failed: 2245 miocnak(q, mp, 0, err); 2246 } 2247 2248 /* 2249 * DLIOCRAW 2250 */ 2251 static void 2252 ioc_raw(dld_str_t *dsp, mblk_t *mp) 2253 { 2254 queue_t *q = dsp->ds_wq; 2255 2256 if (dsp->ds_polling || dsp->ds_soft_ring) { 2257 miocnak(q, mp, 0, EPROTO); 2258 return; 2259 } 2260 2261 rw_enter(&dsp->ds_lock, RW_WRITER); 2262 if ((dsp->ds_mode != DLD_RAW) && (dsp->ds_dlstate == DL_IDLE)) { 2263 /* 2264 * Set the receive callback. 2265 */ 2266 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, dsp); 2267 dsp->ds_tx = str_mdata_raw_put; 2268 } 2269 dsp->ds_mode = DLD_RAW; 2270 rw_exit(&dsp->ds_lock); 2271 miocack(q, mp, 0, 0); 2272 } 2273 2274 /* 2275 * DLIOCHDRINFO 2276 */ 2277 static void 2278 ioc_fast(dld_str_t *dsp, mblk_t *mp) 2279 { 2280 dl_unitdata_req_t *dlp; 2281 off_t off; 2282 size_t len; 2283 const uint8_t *addr; 2284 uint16_t sap; 2285 mblk_t *nmp; 2286 mblk_t *hmp; 2287 uint_t addr_length; 2288 queue_t *q = dsp->ds_wq; 2289 int err; 2290 2291 if (dld_opt & DLD_OPT_NO_FASTPATH) { 2292 err = ENOTSUP; 2293 goto failed; 2294 } 2295 2296 /* 2297 * DLIOCHDRINFO should only come from IP. The one initiated from 2298 * user-land should not be allowed. 2299 */ 2300 if (((struct iocblk *)mp->b_rptr)->ioc_cr != kcred) { 2301 err = EINVAL; 2302 goto failed; 2303 } 2304 2305 nmp = mp->b_cont; 2306 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 2307 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 2308 dlp->dl_primitive != DL_UNITDATA_REQ)) { 2309 err = EINVAL; 2310 goto failed; 2311 } 2312 2313 off = dlp->dl_dest_addr_offset; 2314 len = dlp->dl_dest_addr_length; 2315 2316 if (!MBLKIN(nmp, off, len)) { 2317 err = EINVAL; 2318 goto failed; 2319 } 2320 2321 /* 2322 * We don't need to hold any locks to access ds_dlstate, because 2323 * control message prossessing (which updates this field) is 2324 * serialized. 2325 */ 2326 if (dsp->ds_dlstate != DL_IDLE) { 2327 err = ENOTSUP; 2328 goto failed; 2329 } 2330 2331 addr_length = dsp->ds_mip->mi_addr_length; 2332 if (len != addr_length + sizeof (uint16_t)) { 2333 err = EINVAL; 2334 goto failed; 2335 } 2336 2337 addr = nmp->b_rptr + off; 2338 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 2339 2340 if ((hmp = dls_header(dsp->ds_dc, addr, sap, 0, NULL)) == NULL) { 2341 err = ENOMEM; 2342 goto failed; 2343 } 2344 2345 rw_enter(&dsp->ds_lock, RW_WRITER); 2346 ASSERT(dsp->ds_dlstate == DL_IDLE); 2347 if (dsp->ds_mode != DLD_FASTPATH) { 2348 /* 2349 * Set the receive callback (unless polling or 2350 * soft-ring is enabled). 2351 */ 2352 dsp->ds_mode = DLD_FASTPATH; 2353 if (!dsp->ds_polling && !dsp->ds_soft_ring) 2354 dls_rx_set(dsp->ds_dc, dld_str_rx_fastpath, dsp); 2355 dsp->ds_tx = str_mdata_fastpath_put; 2356 } 2357 rw_exit(&dsp->ds_lock); 2358 2359 freemsg(nmp->b_cont); 2360 nmp->b_cont = hmp; 2361 2362 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 2363 return; 2364 failed: 2365 miocnak(q, mp, 0, err); 2366 } 2367 2368 static void 2369 ioc(dld_str_t *dsp, mblk_t *mp) 2370 { 2371 queue_t *q = dsp->ds_wq; 2372 mac_handle_t mh; 2373 2374 if (dsp->ds_dlstate == DL_UNATTACHED) { 2375 miocnak(q, mp, 0, EINVAL); 2376 return; 2377 } 2378 mh = dsp->ds_mh; 2379 ASSERT(mh != NULL); 2380 mac_ioctl(mh, q, mp); 2381 } 2382