1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Data-Link Driver 31 */ 32 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strsubr.h> 36 #include <sys/atomic.h> 37 #include <sys/mkdev.h> 38 #include <sys/vlan.h> 39 #include <sys/dld.h> 40 #include <sys/dld_impl.h> 41 #include <sys/dls_impl.h> 42 #include <inet/common.h> 43 44 static int str_constructor(void *, void *, int); 45 static void str_destructor(void *, void *); 46 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *); 47 static void str_notify_promisc_on_phys(dld_str_t *); 48 static void str_notify_promisc_off_phys(dld_str_t *); 49 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 50 static void str_notify_link_up(dld_str_t *); 51 static void str_notify_link_down(dld_str_t *); 52 static void str_notify_capab_reneg(dld_str_t *); 53 static void str_notify_speed(dld_str_t *, uint32_t); 54 static void str_notify(void *, mac_notify_type_t); 55 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static minor_t dld_minor_hold(boolean_t); 61 static void dld_minor_rele(minor_t); 62 63 static uint32_t str_count; 64 static kmem_cache_t *str_cachep; 65 static vmem_t *minor_arenap; 66 static uint32_t minor_count; 67 68 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 69 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 70 71 /* 72 * Some notes on entry points, flow-control, queueing and locking: 73 * 74 * This driver exports the traditional STREAMS put entry point as well as 75 * the non-STREAMS fast-path transmit routine which is provided to IP via 76 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 77 * and data operations, while the fast-path routine deals only with M_DATA 78 * fast-path packets. Regardless of the entry point, all outbound packets 79 * will end up in str_mdata_fastpath_put(), where they will be delivered to 80 * the MAC driver. 81 * 82 * The transmit logic operates in two modes: a "not busy" mode where the 83 * packets will be delivered to the MAC for a send attempt, or "busy" mode 84 * where they will be enqueued in the internal queue because of flow-control. 85 * Flow-control happens when the MAC driver indicates the packets couldn't 86 * be transmitted due to lack of resources (e.g. running out of descriptors). 87 * In such case, the driver will place a dummy message on its write-side 88 * STREAMS queue so that the queue is marked as "full". Any subsequent 89 * packets arriving at the driver will be enqueued in the internal queue, 90 * which is drained in the context of the service thread that gets scheduled 91 * whenever the driver is in the "busy" mode. When all packets have been 92 * successfully delivered by MAC and the internal queue is empty, it will 93 * transition to the "not busy" mode by removing the dummy message from the 94 * write-side STREAMS queue; in effect this will trigger backenabling. 95 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 96 * to the above reasons. 97 * 98 * The driver implements an internal transmit queue independent of STREAMS. 99 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 100 * compared to the putq() and get() STREAMS interfaces. The only putq() and 101 * getq() operations done by the driver are those related to placing and 102 * removing the dummy message to/from the write-side STREAMS queue for flow- 103 * control purposes. 104 * 105 * Locking is done independent of STREAMS due to the driver being fully MT. 106 * Threads entering the driver (either from put or service entry points) 107 * will most likely be readers, with the exception of a few writer cases 108 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 109 * DLD-related ioctl requests. The DLPI detach case is special, because 110 * it involves freeing resources and therefore must be single-threaded. 111 * Unfortunately the readers/writers lock can't be used to protect against 112 * it, because the lock is dropped prior to the driver calling places where 113 * putnext() may be invoked, and such places may depend on those resources 114 * to exist. Because of this, the driver always completes the DLPI detach 115 * process when there are no other threads running in the driver. This is 116 * done by keeping track of the number of threads, such that the the last 117 * thread leaving the driver will finish the pending DLPI detach operation. 118 */ 119 120 /* 121 * dld_max_q_count is the queue depth threshold used to limit the number of 122 * outstanding packets or bytes allowed in the queue; once this limit is 123 * reached the driver will free any incoming ones until the queue depth 124 * drops below the threshold. 125 * 126 * This buffering is provided to accomodate clients which do not employ 127 * their own buffering scheme, and to handle occasional packet bursts. 128 * Clients which handle their own buffering will receive positive feedback 129 * from this driver as soon as it transitions into the "busy" state, i.e. 130 * when the queue is initially filled up; they will get backenabled once 131 * the queue is empty. 132 * 133 * The value chosen here is rather arbitrary; in future some intelligent 134 * heuristics may be involved which could take into account the hardware's 135 * transmit ring size, etc. 136 */ 137 uint_t dld_max_q_count = (16 * 1024 *1024); 138 139 static dev_info_t * 140 dld_finddevinfo(dev_t dev) 141 { 142 minor_t minor = getminor(dev); 143 char *drvname = ddi_major_to_name(getmajor(dev)); 144 char name[MAXNAMELEN]; 145 dls_vlan_t *dvp = NULL; 146 dev_info_t *dip = NULL; 147 148 if (drvname == NULL || minor == 0 || minor > DLD_MAX_PPA + 1) 149 return (NULL); 150 151 (void) snprintf(name, MAXNAMELEN, "%s%d", drvname, (int)minor - 1); 152 if (dls_vlan_hold(name, &dvp, B_FALSE) != 0) 153 return (NULL); 154 155 dip = mac_devinfo_get(dvp->dv_dlp->dl_mh); 156 dls_vlan_rele(dvp); 157 return (dip); 158 } 159 160 /* 161 * devo_getinfo: getinfo(9e) 162 */ 163 /*ARGSUSED*/ 164 int 165 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 166 { 167 dev_info_t *devinfo; 168 minor_t minor = getminor((dev_t)arg); 169 int rc = DDI_FAILURE; 170 171 switch (cmd) { 172 case DDI_INFO_DEVT2DEVINFO: 173 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 174 *(dev_info_t **)resp = devinfo; 175 rc = DDI_SUCCESS; 176 } 177 break; 178 case DDI_INFO_DEVT2INSTANCE: 179 if (minor > 0 && minor <= DLD_MAX_PPA + 1) { 180 *(int *)resp = (int)minor - 1; 181 rc = DDI_SUCCESS; 182 } 183 break; 184 } 185 return (rc); 186 } 187 188 /* 189 * qi_qopen: open(9e) 190 */ 191 /*ARGSUSED*/ 192 int 193 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 194 { 195 dld_str_t *dsp; 196 major_t major; 197 minor_t minor; 198 int err; 199 200 if (sflag == MODOPEN) 201 return (ENOTSUP); 202 203 /* 204 * This is a cloning driver and therefore each queue should only 205 * ever get opened once. 206 */ 207 if (rq->q_ptr != NULL) 208 return (EBUSY); 209 210 major = getmajor(*devp); 211 minor = getminor(*devp); 212 if (minor > DLD_MAX_MINOR) 213 return (ENODEV); 214 215 /* 216 * Create a new dld_str_t for the stream. This will grab a new minor 217 * number that will be handed back in the cloned dev_t. Creation may 218 * fail if we can't allocate the dummy mblk used for flow-control. 219 */ 220 dsp = dld_str_create(rq, DLD_DLPI, major, 221 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 222 if (dsp == NULL) 223 return (ENOSR); 224 225 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 226 if (minor != 0) { 227 /* 228 * Style 1 open 229 */ 230 231 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 232 goto failed; 233 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 234 } 235 236 /* 237 * Enable the queue srv(9e) routine. 238 */ 239 qprocson(rq); 240 241 /* 242 * Construct a cloned dev_t to hand back. 243 */ 244 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 245 return (0); 246 247 failed: 248 dld_str_destroy(dsp); 249 return (err); 250 } 251 252 /* 253 * qi_qclose: close(9e) 254 */ 255 int 256 dld_close(queue_t *rq) 257 { 258 dld_str_t *dsp = rq->q_ptr; 259 260 /* 261 * Disable the queue srv(9e) routine. 262 */ 263 qprocsoff(rq); 264 265 /* 266 * At this point we can not be entered by any threads via STREAMS 267 * or the direct call interface, which is available only to IP. 268 * After the interface is unplumbed, IP wouldn't have any reference 269 * to this instance, and therefore we are now effectively single 270 * threaded and don't require any lock protection. Flush all 271 * pending packets which are sitting in the transmit queue. 272 */ 273 ASSERT(dsp->ds_thr == 0); 274 dld_tx_flush(dsp); 275 276 /* 277 * This stream was open to a provider node. Check to see 278 * if it has been cleanly shut down. 279 */ 280 if (dsp->ds_dlstate != DL_UNATTACHED) { 281 /* 282 * The stream is either open to a style 1 provider or 283 * this is not clean shutdown. Detach from the PPA. 284 * (This is still ok even in the style 1 case). 285 */ 286 dld_str_detach(dsp); 287 } 288 289 dld_str_destroy(dsp); 290 return (0); 291 } 292 293 /* 294 * qi_qputp: put(9e) 295 */ 296 void 297 dld_wput(queue_t *wq, mblk_t *mp) 298 { 299 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 300 301 DLD_ENTER(dsp); 302 303 switch (DB_TYPE(mp)) { 304 case M_DATA: 305 rw_enter(&dsp->ds_lock, RW_READER); 306 if (dsp->ds_dlstate != DL_IDLE || 307 dsp->ds_mode == DLD_UNITDATA) { 308 freemsg(mp); 309 } else if (dsp->ds_mode == DLD_FASTPATH) { 310 str_mdata_fastpath_put(dsp, mp); 311 } else if (dsp->ds_mode == DLD_RAW) { 312 str_mdata_raw_put(dsp, mp); 313 } 314 rw_exit(&dsp->ds_lock); 315 break; 316 case M_PROTO: 317 case M_PCPROTO: 318 dld_proto(dsp, mp); 319 break; 320 case M_IOCTL: 321 dld_ioc(dsp, mp); 322 break; 323 case M_FLUSH: 324 if (*mp->b_rptr & FLUSHW) { 325 dld_tx_flush(dsp); 326 *mp->b_rptr &= ~FLUSHW; 327 } 328 329 if (*mp->b_rptr & FLUSHR) { 330 qreply(wq, mp); 331 } else { 332 freemsg(mp); 333 } 334 break; 335 default: 336 freemsg(mp); 337 break; 338 } 339 340 DLD_EXIT(dsp); 341 } 342 343 /* 344 * qi_srvp: srv(9e) 345 */ 346 void 347 dld_wsrv(queue_t *wq) 348 { 349 mblk_t *mp; 350 dld_str_t *dsp = wq->q_ptr; 351 352 DLD_ENTER(dsp); 353 rw_enter(&dsp->ds_lock, RW_READER); 354 /* 355 * Grab all packets (chained via b_next) off our transmit queue 356 * and try to send them all to the MAC layer. Since the queue 357 * is independent of streams, we are able to dequeue all messages 358 * at once without looping through getq() and manually chaining 359 * them. Note that the queue size parameters (byte and message 360 * counts) are cleared as well, but we postpone the backenabling 361 * until after the MAC transmit since some packets may end up 362 * back at our transmit queue. 363 */ 364 mutex_enter(&dsp->ds_tx_list_lock); 365 if ((mp = dsp->ds_tx_list_head) == NULL) { 366 ASSERT(!dsp->ds_tx_qbusy); 367 ASSERT(dsp->ds_tx_flow_mp != NULL); 368 ASSERT(dsp->ds_tx_list_head == NULL); 369 ASSERT(dsp->ds_tx_list_tail == NULL); 370 ASSERT(dsp->ds_tx_cnt == 0); 371 ASSERT(dsp->ds_tx_msgcnt == 0); 372 mutex_exit(&dsp->ds_tx_list_lock); 373 goto done; 374 } 375 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 376 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 377 mutex_exit(&dsp->ds_tx_list_lock); 378 379 /* 380 * Discard packets unless we are attached and bound; note that 381 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 382 * because regardless of the mode all transmit will end up in 383 * str_mdata_fastpath_put() where the packets may be queued. 384 */ 385 ASSERT(DB_TYPE(mp) == M_DATA); 386 if (dsp->ds_dlstate != DL_IDLE) { 387 freemsgchain(mp); 388 goto done; 389 } 390 391 /* 392 * Attempt to transmit one or more packets. If the MAC can't 393 * send them all, re-queue the packet(s) at the beginning of 394 * the transmit queue to avoid any re-ordering. 395 */ 396 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 397 dld_tx_enqueue(dsp, mp, B_TRUE); 398 399 /* 400 * Grab the list lock again and check if the transmit queue is 401 * really empty; if so, lift up flow-control and backenable any 402 * writer queues. If the queue is not empty, schedule service 403 * thread to drain it. 404 */ 405 mutex_enter(&dsp->ds_tx_list_lock); 406 if (dsp->ds_tx_list_head == NULL) { 407 dsp->ds_tx_flow_mp = getq(wq); 408 ASSERT(dsp->ds_tx_flow_mp != NULL); 409 dsp->ds_tx_qbusy = B_FALSE; 410 } 411 mutex_exit(&dsp->ds_tx_list_lock); 412 done: 413 rw_exit(&dsp->ds_lock); 414 DLD_EXIT(dsp); 415 } 416 417 void 418 dld_init_ops(struct dev_ops *ops, const char *name) 419 { 420 struct streamtab *stream; 421 struct qinit *rq, *wq; 422 struct module_info *modinfo; 423 424 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 425 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 426 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 427 modinfo->mi_minpsz = 0; 428 modinfo->mi_maxpsz = 64*1024; 429 modinfo->mi_hiwat = 1; 430 modinfo->mi_lowat = 0; 431 432 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 433 rq->qi_qopen = dld_open; 434 rq->qi_qclose = dld_close; 435 rq->qi_minfo = modinfo; 436 437 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 438 wq->qi_putp = (pfi_t)dld_wput; 439 wq->qi_srvp = (pfi_t)dld_wsrv; 440 wq->qi_minfo = modinfo; 441 442 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 443 stream->st_rdinit = rq; 444 stream->st_wrinit = wq; 445 ops->devo_cb_ops->cb_str = stream; 446 447 ops->devo_getinfo = &dld_getinfo; 448 } 449 450 void 451 dld_fini_ops(struct dev_ops *ops) 452 { 453 struct streamtab *stream; 454 struct qinit *rq, *wq; 455 struct module_info *modinfo; 456 457 stream = ops->devo_cb_ops->cb_str; 458 rq = stream->st_rdinit; 459 wq = stream->st_wrinit; 460 modinfo = rq->qi_minfo; 461 ASSERT(wq->qi_minfo == modinfo); 462 463 kmem_free(stream, sizeof (struct streamtab)); 464 kmem_free(wq, sizeof (struct qinit)); 465 kmem_free(rq, sizeof (struct qinit)); 466 kmem_free(modinfo->mi_idname, FMNAMESZ); 467 kmem_free(modinfo, sizeof (struct module_info)); 468 } 469 470 /* 471 * Initialize this module's data structures. 472 */ 473 void 474 dld_str_init(void) 475 { 476 /* 477 * Create dld_str_t object cache. 478 */ 479 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 480 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 481 ASSERT(str_cachep != NULL); 482 483 /* 484 * Allocate a vmem arena to manage minor numbers. The range of the 485 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 486 * minor number). 487 */ 488 minor_arenap = vmem_create("dld_minor_arena", 489 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 490 VM_SLEEP | VMC_IDENTIFIER); 491 ASSERT(minor_arenap != NULL); 492 } 493 494 /* 495 * Tear down this module's data structures. 496 */ 497 int 498 dld_str_fini(void) 499 { 500 /* 501 * Make sure that there are no objects in use. 502 */ 503 if (str_count != 0) 504 return (EBUSY); 505 506 /* 507 * Check to see if there are any minor numbers still in use. 508 */ 509 if (minor_count != 0) 510 return (EBUSY); 511 512 /* 513 * Destroy object cache. 514 */ 515 kmem_cache_destroy(str_cachep); 516 vmem_destroy(minor_arenap); 517 return (0); 518 } 519 520 /* 521 * Create a new dld_str_t object. 522 */ 523 dld_str_t * 524 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 525 { 526 dld_str_t *dsp; 527 528 /* 529 * Allocate an object from the cache. 530 */ 531 atomic_add_32(&str_count, 1); 532 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 533 534 /* 535 * Allocate the dummy mblk for flow-control. 536 */ 537 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 538 if (dsp->ds_tx_flow_mp == NULL) { 539 kmem_cache_free(str_cachep, dsp); 540 atomic_add_32(&str_count, -1); 541 return (NULL); 542 } 543 dsp->ds_type = type; 544 dsp->ds_major = major; 545 dsp->ds_style = style; 546 547 /* 548 * Initialize the queue pointers. 549 */ 550 ASSERT(RD(rq) == rq); 551 dsp->ds_rq = rq; 552 dsp->ds_wq = WR(rq); 553 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 554 555 /* 556 * We want explicit control over our write-side STREAMS queue 557 * where the dummy mblk gets added/removed for flow-control. 558 */ 559 noenable(WR(rq)); 560 561 return (dsp); 562 } 563 564 /* 565 * Destroy a dld_str_t object. 566 */ 567 void 568 dld_str_destroy(dld_str_t *dsp) 569 { 570 queue_t *rq; 571 queue_t *wq; 572 573 /* 574 * Clear the queue pointers. 575 */ 576 rq = dsp->ds_rq; 577 wq = dsp->ds_wq; 578 ASSERT(wq == WR(rq)); 579 580 rq->q_ptr = wq->q_ptr = NULL; 581 dsp->ds_rq = dsp->ds_wq = NULL; 582 583 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 584 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 585 ASSERT(dsp->ds_tx_list_head == NULL); 586 ASSERT(dsp->ds_tx_list_tail == NULL); 587 ASSERT(dsp->ds_tx_cnt == 0); 588 ASSERT(dsp->ds_tx_msgcnt == 0); 589 ASSERT(!dsp->ds_tx_qbusy); 590 591 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 592 ASSERT(dsp->ds_thr == 0); 593 ASSERT(dsp->ds_detach_req == NULL); 594 595 /* 596 * Reinitialize all the flags. 597 */ 598 dsp->ds_notifications = 0; 599 dsp->ds_passivestate = DLD_UNINITIALIZED; 600 dsp->ds_mode = DLD_UNITDATA; 601 602 /* 603 * Free the dummy mblk if exists. 604 */ 605 if (dsp->ds_tx_flow_mp != NULL) { 606 freeb(dsp->ds_tx_flow_mp); 607 dsp->ds_tx_flow_mp = NULL; 608 } 609 /* 610 * Free the object back to the cache. 611 */ 612 kmem_cache_free(str_cachep, dsp); 613 atomic_add_32(&str_count, -1); 614 } 615 616 /* 617 * kmem_cache contructor function: see kmem_cache_create(9f). 618 */ 619 /*ARGSUSED*/ 620 static int 621 str_constructor(void *buf, void *cdrarg, int kmflags) 622 { 623 dld_str_t *dsp = buf; 624 625 bzero(buf, sizeof (dld_str_t)); 626 627 /* 628 * Allocate a new minor number. 629 */ 630 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 631 return (-1); 632 633 /* 634 * Initialize the DLPI state machine. 635 */ 636 dsp->ds_dlstate = DL_UNATTACHED; 637 638 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 639 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 640 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 641 642 return (0); 643 } 644 645 /* 646 * kmem_cache destructor function. 647 */ 648 /*ARGSUSED*/ 649 static void 650 str_destructor(void *buf, void *cdrarg) 651 { 652 dld_str_t *dsp = buf; 653 654 /* 655 * Make sure the DLPI state machine was reset. 656 */ 657 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 658 659 /* 660 * Make sure the data-link interface was closed. 661 */ 662 ASSERT(dsp->ds_mh == NULL); 663 ASSERT(dsp->ds_dc == NULL); 664 665 /* 666 * Make sure enabled notifications are cleared. 667 */ 668 ASSERT(dsp->ds_notifications == 0); 669 670 /* 671 * Make sure polling is disabled. 672 */ 673 ASSERT(!dsp->ds_polling); 674 675 /* 676 * Release the minor number. 677 */ 678 dld_minor_rele(dsp->ds_minor); 679 680 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 681 rw_destroy(&dsp->ds_lock); 682 683 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 684 mutex_destroy(&dsp->ds_tx_list_lock); 685 ASSERT(dsp->ds_tx_flow_mp == NULL); 686 687 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 688 mutex_destroy(&dsp->ds_thr_lock); 689 ASSERT(dsp->ds_detach_req == NULL); 690 } 691 692 /* 693 * M_DATA put (IP fast-path mode) 694 */ 695 void 696 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 697 { 698 /* 699 * We get here either as a result of putnext() from above or 700 * because IP has called us directly. If we are in the busy 701 * mode enqueue the packet(s) and return. Otherwise hand them 702 * over to the MAC driver for transmission; any remaining one(s) 703 * which didn't get sent will be queued. 704 * 705 * Note here that we don't grab the list lock prior to checking 706 * the busy flag. This is okay, because a missed transition 707 * will not cause any packet reordering for any particular TCP 708 * connection (which is single-threaded). The enqueue routine 709 * will atomically set the busy flag and schedule the service 710 * thread to run; the flag is only cleared by the service thread 711 * when there is no more packet to be transmitted. 712 */ 713 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 714 dld_tx_enqueue(dsp, mp, B_FALSE); 715 } 716 717 /* 718 * M_DATA put (raw mode) 719 */ 720 void 721 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 722 { 723 struct ether_header *ehp; 724 mblk_t *bp; 725 size_t size; 726 size_t hdrlen; 727 728 size = MBLKL(mp); 729 if (size < sizeof (struct ether_header)) 730 goto discard; 731 732 hdrlen = sizeof (struct ether_header); 733 734 ehp = (struct ether_header *)mp->b_rptr; 735 if (ntohs(ehp->ether_type) == VLAN_TPID) { 736 struct ether_vlan_header *evhp; 737 738 if (size < sizeof (struct ether_vlan_header)) 739 goto discard; 740 741 /* 742 * Replace vtag with our own 743 */ 744 evhp = (struct ether_vlan_header *)ehp; 745 evhp->ether_tci = htons(VLAN_TCI(dsp->ds_pri, 746 ETHER_CFI, dsp->ds_vid)); 747 hdrlen = sizeof (struct ether_vlan_header); 748 } 749 750 /* 751 * Check the packet is not too big and that any remaining 752 * fragment list is composed entirely of M_DATA messages. (We 753 * know the first fragment was M_DATA otherwise we could not 754 * have got here). 755 */ 756 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 757 if (DB_TYPE(bp) != M_DATA) 758 goto discard; 759 size += MBLKL(bp); 760 } 761 762 if (size > dsp->ds_mip->mi_sdu_max + hdrlen) 763 goto discard; 764 765 str_mdata_fastpath_put(dsp, mp); 766 return; 767 768 discard: 769 freemsg(mp); 770 } 771 772 /* 773 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 774 */ 775 int 776 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 777 { 778 int err; 779 const char *drvname; 780 char name[MAXNAMELEN]; 781 dls_channel_t dc; 782 uint_t addr_length; 783 784 ASSERT(dsp->ds_dc == NULL); 785 786 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 787 return (EINVAL); 788 789 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 790 791 if (strcmp(drvname, "aggr") != 0 && 792 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 793 return (EINVAL); 794 795 /* 796 * Open a channel. 797 */ 798 if ((err = dls_open(name, &dc)) != 0) { 799 (void) qassociate(dsp->ds_wq, -1); 800 return (err); 801 } 802 803 /* 804 * Cache the MAC interface handle, a pointer to the immutable MAC 805 * information and the current and 'factory' MAC address. 806 */ 807 dsp->ds_mh = dls_mac(dc); 808 dsp->ds_mip = mac_info(dsp->ds_mh); 809 810 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 811 812 addr_length = dsp->ds_mip->mi_addr_length; 813 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 814 815 /* 816 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 817 * a non-VLAN interface). 818 */ 819 dsp->ds_vid = dls_vid(dc); 820 821 /* 822 * Set the default packet priority. 823 */ 824 dsp->ds_pri = 0; 825 826 /* 827 * Add a notify function so that the we get updates from the MAC. 828 */ 829 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 830 831 dsp->ds_dc = dc; 832 dsp->ds_dlstate = DL_UNBOUND; 833 834 return (0); 835 } 836 837 /* 838 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 839 * from close(2) for style 2. 840 */ 841 void 842 dld_str_detach(dld_str_t *dsp) 843 { 844 ASSERT(dsp->ds_thr == 0); 845 846 /* 847 * Remove the notify function. 848 */ 849 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 850 851 /* 852 * Re-initialize the DLPI state machine. 853 */ 854 dsp->ds_dlstate = DL_UNATTACHED; 855 856 /* 857 * Clear the polling and promisc flags. 858 */ 859 dsp->ds_polling = B_FALSE; 860 dsp->ds_promisc = 0; 861 862 /* 863 * Close the channel. 864 */ 865 dls_close(dsp->ds_dc); 866 dsp->ds_dc = NULL; 867 dsp->ds_mh = NULL; 868 869 (void) qassociate(dsp->ds_wq, -1); 870 } 871 872 /* 873 * Raw mode receive function. 874 */ 875 /*ARGSUSED*/ 876 void 877 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 878 size_t header_length) 879 { 880 dld_str_t *dsp = (dld_str_t *)arg; 881 mblk_t *next; 882 883 ASSERT(mp != NULL); 884 do { 885 /* 886 * Get the pointer to the next packet in the chain and then 887 * clear b_next before the packet gets passed on. 888 */ 889 next = mp->b_next; 890 mp->b_next = NULL; 891 892 /* 893 * Wind back b_rptr to point at the MAC header. 894 */ 895 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 896 mp->b_rptr -= header_length; 897 if (header_length == sizeof (struct ether_vlan_header)) { 898 /* 899 * Strip off the vtag 900 */ 901 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 902 2 * ETHERADDRL); 903 mp->b_rptr += VLAN_TAGSZ; 904 } 905 906 /* 907 * Pass the packet on. 908 */ 909 putnext(dsp->ds_rq, mp); 910 911 /* 912 * Move on to the next packet in the chain. 913 */ 914 mp = next; 915 } while (mp != NULL); 916 } 917 918 /* 919 * Fast-path receive function. 920 */ 921 /*ARGSUSED*/ 922 void 923 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 924 size_t header_length) 925 { 926 dld_str_t *dsp = (dld_str_t *)arg; 927 mblk_t *next; 928 929 ASSERT(mp != NULL); 930 do { 931 /* 932 * Get the pointer to the next packet in the chain and then 933 * clear b_next before the packet gets passed on. 934 */ 935 next = mp->b_next; 936 mp->b_next = NULL; 937 938 /* 939 * Pass the packet on. 940 */ 941 putnext(dsp->ds_rq, mp); 942 943 /* 944 * Move on to the next packet in the chain. 945 */ 946 mp = next; 947 } while (mp != NULL); 948 } 949 950 /* 951 * Default receive function (send DL_UNITDATA_IND messages). 952 */ 953 /*ARGSUSED*/ 954 void 955 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 956 size_t header_length) 957 { 958 dld_str_t *dsp = (dld_str_t *)arg; 959 mblk_t *ud_mp; 960 mblk_t *next; 961 962 ASSERT(mp != NULL); 963 do { 964 /* 965 * Get the pointer to the next packet in the chain and then 966 * clear b_next before the packet gets passed on. 967 */ 968 next = mp->b_next; 969 mp->b_next = NULL; 970 971 /* 972 * Wind back b_rptr to point at the MAC header. 973 */ 974 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 975 mp->b_rptr -= header_length; 976 977 /* 978 * Create the DL_UNITDATA_IND M_PROTO. 979 */ 980 if ((ud_mp = str_unitdata_ind(dsp, mp)) == NULL) { 981 freemsgchain(mp); 982 return; 983 } 984 985 /* 986 * Advance b_rptr to point at the payload again. 987 */ 988 mp->b_rptr += header_length; 989 990 /* 991 * Prepend the DL_UNITDATA_IND. 992 */ 993 ud_mp->b_cont = mp; 994 995 /* 996 * Send the message. 997 */ 998 putnext(dsp->ds_rq, ud_mp); 999 1000 /* 1001 * Move on to the next packet in the chain. 1002 */ 1003 mp = next; 1004 } while (mp != NULL); 1005 } 1006 1007 /* 1008 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1009 * current state of the interface. 1010 */ 1011 void 1012 dld_str_notify_ind(dld_str_t *dsp) 1013 { 1014 mac_notify_type_t type; 1015 1016 for (type = 0; type < MAC_NNOTE; type++) 1017 str_notify(dsp, type); 1018 } 1019 1020 typedef struct dl_unitdata_ind_wrapper { 1021 dl_unitdata_ind_t dl_unitdata; 1022 uint8_t dl_dest_addr[MAXADDRLEN + sizeof (uint16_t)]; 1023 uint8_t dl_src_addr[MAXADDRLEN + sizeof (uint16_t)]; 1024 } dl_unitdata_ind_wrapper_t; 1025 1026 /* 1027 * Create a DL_UNITDATA_IND M_PROTO message. 1028 */ 1029 static mblk_t * 1030 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp) 1031 { 1032 mblk_t *nmp; 1033 dl_unitdata_ind_wrapper_t *dlwp; 1034 dl_unitdata_ind_t *dlp; 1035 dls_header_info_t dhi; 1036 uint_t addr_length; 1037 uint8_t *daddr; 1038 uint8_t *saddr; 1039 1040 /* 1041 * Get the packet header information. 1042 */ 1043 dls_header_info(dsp->ds_dc, mp, &dhi); 1044 1045 /* 1046 * Allocate a message large enough to contain the wrapper structure 1047 * defined above. 1048 */ 1049 if ((nmp = mexchange(dsp->ds_wq, NULL, 1050 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1051 DL_UNITDATA_IND)) == NULL) 1052 return (NULL); 1053 1054 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1055 1056 dlp = &(dlwp->dl_unitdata); 1057 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1058 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1059 1060 /* 1061 * Copy in the destination address. 1062 */ 1063 addr_length = dsp->ds_mip->mi_addr_length; 1064 daddr = dlwp->dl_dest_addr; 1065 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1066 bcopy(dhi.dhi_daddr, daddr, addr_length); 1067 1068 /* 1069 * Set the destination DLSAP to our bound DLSAP value. 1070 */ 1071 *(uint16_t *)(daddr + addr_length) = dsp->ds_sap; 1072 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1073 1074 /* 1075 * If the destination address was a group address then 1076 * dl_group_address field should be non-zero. 1077 */ 1078 dlp->dl_group_address = dhi.dhi_isgroup; 1079 1080 /* 1081 * Copy in the source address. 1082 */ 1083 saddr = dlwp->dl_src_addr; 1084 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1085 bcopy(dhi.dhi_saddr, saddr, addr_length); 1086 1087 /* 1088 * Set the source DLSAP to the packet ethertype. 1089 */ 1090 *(uint16_t *)(saddr + addr_length) = dhi.dhi_ethertype; 1091 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1092 1093 return (nmp); 1094 } 1095 1096 /* 1097 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1098 */ 1099 static void 1100 str_notify_promisc_on_phys(dld_str_t *dsp) 1101 { 1102 mblk_t *mp; 1103 dl_notify_ind_t *dlip; 1104 1105 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1106 return; 1107 1108 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1109 M_PROTO, 0)) == NULL) 1110 return; 1111 1112 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1113 dlip = (dl_notify_ind_t *)mp->b_rptr; 1114 dlip->dl_primitive = DL_NOTIFY_IND; 1115 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1116 1117 qreply(dsp->ds_wq, mp); 1118 } 1119 1120 /* 1121 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1122 */ 1123 static void 1124 str_notify_promisc_off_phys(dld_str_t *dsp) 1125 { 1126 mblk_t *mp; 1127 dl_notify_ind_t *dlip; 1128 1129 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1130 return; 1131 1132 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1133 M_PROTO, 0)) == NULL) 1134 return; 1135 1136 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1137 dlip = (dl_notify_ind_t *)mp->b_rptr; 1138 dlip->dl_primitive = DL_NOTIFY_IND; 1139 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1140 1141 qreply(dsp->ds_wq, mp); 1142 } 1143 1144 /* 1145 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1146 */ 1147 static void 1148 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1149 { 1150 mblk_t *mp; 1151 dl_notify_ind_t *dlip; 1152 uint_t addr_length; 1153 uint16_t ethertype; 1154 1155 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1156 return; 1157 1158 addr_length = dsp->ds_mip->mi_addr_length; 1159 if ((mp = mexchange(dsp->ds_wq, NULL, 1160 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1161 M_PROTO, 0)) == NULL) 1162 return; 1163 1164 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1165 dlip = (dl_notify_ind_t *)mp->b_rptr; 1166 dlip->dl_primitive = DL_NOTIFY_IND; 1167 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1168 dlip->dl_data = DL_CURR_PHYS_ADDR; 1169 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1170 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1171 1172 bcopy(addr, &dlip[1], addr_length); 1173 1174 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1175 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1176 ethertype; 1177 1178 qreply(dsp->ds_wq, mp); 1179 } 1180 1181 /* 1182 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1183 */ 1184 static void 1185 str_notify_link_up(dld_str_t *dsp) 1186 { 1187 mblk_t *mp; 1188 dl_notify_ind_t *dlip; 1189 1190 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1191 return; 1192 1193 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1194 M_PROTO, 0)) == NULL) 1195 return; 1196 1197 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1198 dlip = (dl_notify_ind_t *)mp->b_rptr; 1199 dlip->dl_primitive = DL_NOTIFY_IND; 1200 dlip->dl_notification = DL_NOTE_LINK_UP; 1201 1202 qreply(dsp->ds_wq, mp); 1203 } 1204 1205 /* 1206 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1207 */ 1208 static void 1209 str_notify_link_down(dld_str_t *dsp) 1210 { 1211 mblk_t *mp; 1212 dl_notify_ind_t *dlip; 1213 1214 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1215 return; 1216 1217 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1218 M_PROTO, 0)) == NULL) 1219 return; 1220 1221 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1222 dlip = (dl_notify_ind_t *)mp->b_rptr; 1223 dlip->dl_primitive = DL_NOTIFY_IND; 1224 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1225 1226 qreply(dsp->ds_wq, mp); 1227 } 1228 1229 /* 1230 * DL_NOTIFY_IND: DL_NOTE_SPEED 1231 */ 1232 static void 1233 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1234 { 1235 mblk_t *mp; 1236 dl_notify_ind_t *dlip; 1237 1238 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1239 return; 1240 1241 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1242 M_PROTO, 0)) == NULL) 1243 return; 1244 1245 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1246 dlip = (dl_notify_ind_t *)mp->b_rptr; 1247 dlip->dl_primitive = DL_NOTIFY_IND; 1248 dlip->dl_notification = DL_NOTE_SPEED; 1249 dlip->dl_data = speed; 1250 1251 qreply(dsp->ds_wq, mp); 1252 } 1253 1254 /* 1255 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1256 */ 1257 static void 1258 str_notify_capab_reneg(dld_str_t *dsp) 1259 { 1260 mblk_t *mp; 1261 dl_notify_ind_t *dlip; 1262 1263 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1264 return; 1265 1266 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1267 M_PROTO, 0)) == NULL) 1268 return; 1269 1270 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1271 dlip = (dl_notify_ind_t *)mp->b_rptr; 1272 dlip->dl_primitive = DL_NOTIFY_IND; 1273 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1274 1275 qreply(dsp->ds_wq, mp); 1276 } 1277 1278 /* 1279 * MAC notification callback. 1280 */ 1281 static void 1282 str_notify(void *arg, mac_notify_type_t type) 1283 { 1284 dld_str_t *dsp = (dld_str_t *)arg; 1285 queue_t *q = dsp->ds_wq; 1286 1287 switch (type) { 1288 case MAC_NOTE_TX: 1289 qenable(q); 1290 break; 1291 1292 case MAC_NOTE_DEVPROMISC: 1293 /* 1294 * Send the appropriate DL_NOTIFY_IND. 1295 */ 1296 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1297 str_notify_promisc_on_phys(dsp); 1298 else 1299 str_notify_promisc_off_phys(dsp); 1300 break; 1301 1302 case MAC_NOTE_PROMISC: 1303 break; 1304 1305 case MAC_NOTE_UNICST: 1306 /* 1307 * This notification is sent whenever the MAC unicast address 1308 * changes. We need to re-cache the address. 1309 */ 1310 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1311 1312 /* 1313 * Send the appropriate DL_NOTIFY_IND. 1314 */ 1315 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1316 break; 1317 1318 case MAC_NOTE_LINK: 1319 /* 1320 * This notification is sent every time the MAC driver 1321 * updates the link state. 1322 */ 1323 switch (mac_link_get(dsp->ds_mh)) { 1324 case LINK_STATE_UP: 1325 /* 1326 * The link is up so send the appropriate 1327 * DL_NOTIFY_IND. 1328 */ 1329 str_notify_link_up(dsp); 1330 1331 /* 1332 * If we can find the link speed then send a 1333 * DL_NOTIFY_IND for that too. 1334 */ 1335 if (dsp->ds_mip->mi_stat[MAC_STAT_IFSPEED]) { 1336 uint64_t val; 1337 1338 val = mac_stat_get(dsp->ds_mh, 1339 MAC_STAT_IFSPEED); 1340 str_notify_speed(dsp, 1341 (uint32_t)(val / 1000ull)); 1342 } 1343 break; 1344 1345 case LINK_STATE_DOWN: 1346 /* 1347 * The link is down so send the appropriate 1348 * DL_NOTIFY_IND. 1349 */ 1350 str_notify_link_down(dsp); 1351 break; 1352 1353 default: 1354 break; 1355 } 1356 break; 1357 1358 case MAC_NOTE_RESOURCE: 1359 /* 1360 * This notification is sent whenever the MAC resources 1361 * change. We need to renegotiate the capabilities. 1362 * Send the appropriate DL_NOTIFY_IND. 1363 */ 1364 str_notify_capab_reneg(dsp); 1365 break; 1366 1367 default: 1368 ASSERT(B_FALSE); 1369 break; 1370 } 1371 } 1372 1373 /* 1374 * Enqueue one or more messages to the transmit queue. 1375 * Caller specifies the insertion position (head/tail). 1376 */ 1377 void 1378 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1379 { 1380 mblk_t *tail; 1381 queue_t *q = dsp->ds_wq; 1382 uint_t cnt, msgcnt; 1383 uint_t tot_cnt, tot_msgcnt; 1384 1385 ASSERT(DB_TYPE(mp) == M_DATA); 1386 /* Calculate total size and count of the packet(s) */ 1387 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1388 tail->b_next != NULL; tail = tail->b_next) { 1389 ASSERT(DB_TYPE(tail) == M_DATA); 1390 cnt += msgdsize(tail); 1391 msgcnt++; 1392 } 1393 1394 mutex_enter(&dsp->ds_tx_list_lock); 1395 /* 1396 * If the queue depth would exceed the allowed threshold, drop 1397 * new packet(s) and drain those already in the queue. 1398 */ 1399 tot_cnt = dsp->ds_tx_cnt + cnt; 1400 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1401 1402 if (!head_insert && 1403 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1404 ASSERT(dsp->ds_tx_qbusy); 1405 mutex_exit(&dsp->ds_tx_list_lock); 1406 freemsgchain(mp); 1407 goto done; 1408 } 1409 1410 /* Update the queue size parameters */ 1411 dsp->ds_tx_cnt = tot_cnt; 1412 dsp->ds_tx_msgcnt = tot_msgcnt; 1413 1414 /* 1415 * If the transmit queue is currently empty and we are 1416 * about to deposit the packet(s) there, switch mode to 1417 * "busy" and raise flow-control condition. 1418 */ 1419 if (!dsp->ds_tx_qbusy) { 1420 dsp->ds_tx_qbusy = B_TRUE; 1421 ASSERT(dsp->ds_tx_flow_mp != NULL); 1422 (void) putq(q, dsp->ds_tx_flow_mp); 1423 dsp->ds_tx_flow_mp = NULL; 1424 } 1425 1426 if (!head_insert) { 1427 /* Tail insertion */ 1428 if (dsp->ds_tx_list_head == NULL) 1429 dsp->ds_tx_list_head = mp; 1430 else 1431 dsp->ds_tx_list_tail->b_next = mp; 1432 dsp->ds_tx_list_tail = tail; 1433 } else { 1434 /* Head insertion */ 1435 tail->b_next = dsp->ds_tx_list_head; 1436 if (dsp->ds_tx_list_head == NULL) 1437 dsp->ds_tx_list_tail = tail; 1438 dsp->ds_tx_list_head = mp; 1439 } 1440 mutex_exit(&dsp->ds_tx_list_lock); 1441 done: 1442 /* Schedule service thread to drain the transmit queue */ 1443 qenable(q); 1444 } 1445 1446 void 1447 dld_tx_flush(dld_str_t *dsp) 1448 { 1449 mutex_enter(&dsp->ds_tx_list_lock); 1450 if (dsp->ds_tx_list_head != NULL) { 1451 freemsgchain(dsp->ds_tx_list_head); 1452 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1453 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1454 if (dsp->ds_tx_qbusy) { 1455 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1456 ASSERT(dsp->ds_tx_flow_mp != NULL); 1457 dsp->ds_tx_qbusy = B_FALSE; 1458 } 1459 } 1460 mutex_exit(&dsp->ds_tx_list_lock); 1461 } 1462 1463 /* 1464 * Process an M_IOCTL message. 1465 */ 1466 static void 1467 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1468 { 1469 uint_t cmd; 1470 1471 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1472 ASSERT(dsp->ds_type == DLD_DLPI); 1473 1474 switch (cmd) { 1475 case DLIOCRAW: 1476 ioc_raw(dsp, mp); 1477 break; 1478 case DLIOCHDRINFO: 1479 ioc_fast(dsp, mp); 1480 break; 1481 default: 1482 ioc(dsp, mp); 1483 } 1484 } 1485 1486 /* 1487 * DLIOCRAW 1488 */ 1489 static void 1490 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1491 { 1492 queue_t *q = dsp->ds_wq; 1493 1494 rw_enter(&dsp->ds_lock, RW_WRITER); 1495 if (dsp->ds_polling) { 1496 rw_exit(&dsp->ds_lock); 1497 miocnak(q, mp, 0, EPROTO); 1498 return; 1499 } 1500 1501 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1502 /* 1503 * Set the receive callback. 1504 */ 1505 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1506 1507 /* 1508 * Note that raw mode is enabled. 1509 */ 1510 dsp->ds_mode = DLD_RAW; 1511 } 1512 1513 rw_exit(&dsp->ds_lock); 1514 miocack(q, mp, 0, 0); 1515 } 1516 1517 /* 1518 * DLIOCHDRINFO 1519 */ 1520 static void 1521 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1522 { 1523 dl_unitdata_req_t *dlp; 1524 off_t off; 1525 size_t len; 1526 const uint8_t *addr; 1527 uint16_t sap; 1528 mblk_t *nmp; 1529 mblk_t *hmp; 1530 uint_t addr_length; 1531 queue_t *q = dsp->ds_wq; 1532 int err; 1533 dls_channel_t dc; 1534 1535 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1536 err = ENOTSUP; 1537 goto failed; 1538 } 1539 1540 nmp = mp->b_cont; 1541 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1542 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1543 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1544 err = EINVAL; 1545 goto failed; 1546 } 1547 1548 off = dlp->dl_dest_addr_offset; 1549 len = dlp->dl_dest_addr_length; 1550 1551 if (!MBLKIN(nmp, off, len)) { 1552 err = EINVAL; 1553 goto failed; 1554 } 1555 1556 rw_enter(&dsp->ds_lock, RW_READER); 1557 if (dsp->ds_dlstate != DL_IDLE) { 1558 rw_exit(&dsp->ds_lock); 1559 err = ENOTSUP; 1560 goto failed; 1561 } 1562 1563 addr_length = dsp->ds_mip->mi_addr_length; 1564 if (len != addr_length + sizeof (uint16_t)) { 1565 rw_exit(&dsp->ds_lock); 1566 err = EINVAL; 1567 goto failed; 1568 } 1569 1570 addr = nmp->b_rptr + off; 1571 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 1572 dc = dsp->ds_dc; 1573 1574 if ((hmp = dls_header(dc, addr, sap, dsp->ds_pri)) == NULL) { 1575 rw_exit(&dsp->ds_lock); 1576 err = ENOMEM; 1577 goto failed; 1578 } 1579 1580 /* 1581 * This is a performance optimization. We originally entered 1582 * as reader and only become writer upon transitioning into 1583 * the DLD_FASTPATH mode for the first time. Otherwise we 1584 * stay as reader and return the fast-path header to IP. 1585 */ 1586 if (dsp->ds_mode != DLD_FASTPATH) { 1587 if (!rw_tryupgrade(&dsp->ds_lock)) { 1588 rw_exit(&dsp->ds_lock); 1589 rw_enter(&dsp->ds_lock, RW_WRITER); 1590 1591 /* 1592 * State may have changed before we re-acquired 1593 * the writer lock in case the upgrade failed. 1594 */ 1595 if (dsp->ds_dlstate != DL_IDLE) { 1596 rw_exit(&dsp->ds_lock); 1597 err = ENOTSUP; 1598 goto failed; 1599 } 1600 } 1601 1602 /* 1603 * Set the receive callback (unless polling is enabled). 1604 */ 1605 if (!dsp->ds_polling) 1606 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 1607 1608 /* 1609 * Note that fast-path mode is enabled. 1610 */ 1611 dsp->ds_mode = DLD_FASTPATH; 1612 } 1613 rw_exit(&dsp->ds_lock); 1614 1615 freemsg(nmp->b_cont); 1616 nmp->b_cont = hmp; 1617 1618 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 1619 return; 1620 failed: 1621 miocnak(q, mp, 0, err); 1622 } 1623 1624 /* 1625 * Catch-all handler. 1626 */ 1627 static void 1628 ioc(dld_str_t *dsp, mblk_t *mp) 1629 { 1630 queue_t *q = dsp->ds_wq; 1631 mac_handle_t mh; 1632 1633 rw_enter(&dsp->ds_lock, RW_READER); 1634 if (dsp->ds_dlstate == DL_UNATTACHED) { 1635 rw_exit(&dsp->ds_lock); 1636 miocnak(q, mp, 0, EINVAL); 1637 return; 1638 } 1639 mh = dsp->ds_mh; 1640 ASSERT(mh != NULL); 1641 rw_exit(&dsp->ds_lock); 1642 mac_ioctl(mh, q, mp); 1643 } 1644 1645 /* 1646 * Allocate a new minor number. 1647 */ 1648 static minor_t 1649 dld_minor_hold(boolean_t sleep) 1650 { 1651 minor_t minor; 1652 1653 /* 1654 * Grab a value from the arena. 1655 */ 1656 atomic_add_32(&minor_count, 1); 1657 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 1658 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 1659 atomic_add_32(&minor_count, -1); 1660 return (0); 1661 } 1662 1663 return (minor); 1664 } 1665 1666 /* 1667 * Release a previously allocated minor number. 1668 */ 1669 static void 1670 dld_minor_rele(minor_t minor) 1671 { 1672 /* 1673 * Return the value to the arena. 1674 */ 1675 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 1676 1677 atomic_add_32(&minor_count, -1); 1678 } 1679