1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Data-Link Driver 31 */ 32 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strsubr.h> 36 #include <sys/atomic.h> 37 #include <sys/mkdev.h> 38 #include <sys/vlan.h> 39 #include <sys/dld.h> 40 #include <sys/dld_impl.h> 41 #include <sys/dls_impl.h> 42 #include <inet/common.h> 43 44 static int str_constructor(void *, void *, int); 45 static void str_destructor(void *, void *); 46 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *); 47 static void str_notify_promisc_on_phys(dld_str_t *); 48 static void str_notify_promisc_off_phys(dld_str_t *); 49 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 50 static void str_notify_link_up(dld_str_t *); 51 static void str_notify_link_down(dld_str_t *); 52 static void str_notify_capab_reneg(dld_str_t *); 53 static void str_notify_speed(dld_str_t *, uint32_t); 54 static void str_notify(void *, mac_notify_type_t); 55 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static minor_t dld_minor_hold(boolean_t); 61 static void dld_minor_rele(minor_t); 62 63 static uint32_t str_count; 64 static kmem_cache_t *str_cachep; 65 static vmem_t *minor_arenap; 66 static uint32_t minor_count; 67 68 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 69 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 70 71 /* 72 * Some notes on entry points, flow-control, queueing and locking: 73 * 74 * This driver exports the traditional STREAMS put entry point as well as 75 * the non-STREAMS fast-path transmit routine which is provided to IP via 76 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 77 * and data operations, while the fast-path routine deals only with M_DATA 78 * fast-path packets. Regardless of the entry point, all outbound packets 79 * will end up in str_mdata_fastpath_put(), where they will be delivered to 80 * the MAC driver. 81 * 82 * The transmit logic operates in two modes: a "not busy" mode where the 83 * packets will be delivered to the MAC for a send attempt, or "busy" mode 84 * where they will be enqueued in the internal queue because of flow-control. 85 * Flow-control happens when the MAC driver indicates the packets couldn't 86 * be transmitted due to lack of resources (e.g. running out of descriptors). 87 * In such case, the driver will place a dummy message on its write-side 88 * STREAMS queue so that the queue is marked as "full". Any subsequent 89 * packets arriving at the driver will be enqueued in the internal queue, 90 * which is drained in the context of the service thread that gets scheduled 91 * whenever the driver is in the "busy" mode. When all packets have been 92 * successfully delivered by MAC and the internal queue is empty, it will 93 * transition to the "not busy" mode by removing the dummy message from the 94 * write-side STREAMS queue; in effect this will trigger backenabling. 95 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 96 * to the above reasons. 97 * 98 * The driver implements an internal transmit queue independent of STREAMS. 99 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 100 * compared to the putq() and get() STREAMS interfaces. The only putq() and 101 * getq() operations done by the driver are those related to placing and 102 * removing the dummy message to/from the write-side STREAMS queue for flow- 103 * control purposes. 104 * 105 * Locking is done independent of STREAMS due to the driver being fully MT. 106 * Threads entering the driver (either from put or service entry points) 107 * will most likely be readers, with the exception of a few writer cases 108 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 109 * DLD-related ioctl requests. The DLPI detach case is special, because 110 * it involves freeing resources and therefore must be single-threaded. 111 * Unfortunately the readers/writers lock can't be used to protect against 112 * it, because the lock is dropped prior to the driver calling places where 113 * putnext() may be invoked, and such places may depend on those resources 114 * to exist. Because of this, the driver always completes the DLPI detach 115 * process when there are no other threads running in the driver. This is 116 * done by keeping track of the number of threads, such that the the last 117 * thread leaving the driver will finish the pending DLPI detach operation. 118 */ 119 120 /* 121 * dld_max_q_count is the queue depth threshold used to limit the number of 122 * outstanding packets or bytes allowed in the queue; once this limit is 123 * reached the driver will free any incoming ones until the queue depth 124 * drops below the threshold. 125 * 126 * This buffering is provided to accomodate clients which do not employ 127 * their own buffering scheme, and to handle occasional packet bursts. 128 * Clients which handle their own buffering will receive positive feedback 129 * from this driver as soon as it transitions into the "busy" state, i.e. 130 * when the queue is initially filled up; they will get backenabled once 131 * the queue is empty. 132 * 133 * The value chosen here is rather arbitrary; in future some intelligent 134 * heuristics may be involved which could take into account the hardware's 135 * transmit ring size, etc. 136 */ 137 uint_t dld_max_q_count = (16 * 1024 *1024); 138 139 static dev_info_t * 140 dld_finddevinfo(dev_t dev) 141 { 142 minor_t minor = getminor(dev); 143 char *drvname = ddi_major_to_name(getmajor(dev)); 144 char name[MAXNAMELEN]; 145 dls_vlan_t *dvp = NULL; 146 dev_info_t *dip = NULL; 147 148 if (drvname == NULL || minor == 0 || minor > DLD_MAX_PPA + 1) 149 return (NULL); 150 151 (void) snprintf(name, MAXNAMELEN, "%s%d", drvname, (int)minor - 1); 152 if (dls_vlan_hold(name, &dvp, B_FALSE) != 0) 153 return (NULL); 154 155 dip = mac_devinfo_get(dvp->dv_dlp->dl_mh); 156 dls_vlan_rele(dvp); 157 return (dip); 158 } 159 160 /* 161 * devo_getinfo: getinfo(9e) 162 */ 163 /*ARGSUSED*/ 164 int 165 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 166 { 167 dev_info_t *devinfo; 168 minor_t minor = getminor((dev_t)arg); 169 int rc = DDI_FAILURE; 170 171 switch (cmd) { 172 case DDI_INFO_DEVT2DEVINFO: 173 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 174 *(dev_info_t **)resp = devinfo; 175 rc = DDI_SUCCESS; 176 } 177 break; 178 case DDI_INFO_DEVT2INSTANCE: 179 if (minor > 0 && minor <= DLD_MAX_PPA + 1) { 180 *(int *)resp = (int)minor - 1; 181 rc = DDI_SUCCESS; 182 } 183 break; 184 } 185 return (rc); 186 } 187 188 /* 189 * qi_qopen: open(9e) 190 */ 191 /*ARGSUSED*/ 192 int 193 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 194 { 195 dld_str_t *dsp; 196 major_t major; 197 minor_t minor; 198 int err; 199 200 if (sflag == MODOPEN) 201 return (ENOTSUP); 202 203 /* 204 * This is a cloning driver and therefore each queue should only 205 * ever get opened once. 206 */ 207 if (rq->q_ptr != NULL) 208 return (EBUSY); 209 210 major = getmajor(*devp); 211 minor = getminor(*devp); 212 if (minor > DLD_MAX_MINOR) 213 return (ENODEV); 214 215 /* 216 * Create a new dld_str_t for the stream. This will grab a new minor 217 * number that will be handed back in the cloned dev_t. Creation may 218 * fail if we can't allocate the dummy mblk used for flow-control. 219 */ 220 dsp = dld_str_create(rq, DLD_DLPI, major, 221 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 222 if (dsp == NULL) 223 return (ENOSR); 224 225 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 226 if (minor != 0) { 227 /* 228 * Style 1 open 229 */ 230 231 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 232 goto failed; 233 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 234 } else { 235 (void) qassociate(rq, -1); 236 } 237 238 /* 239 * Enable the queue srv(9e) routine. 240 */ 241 qprocson(rq); 242 243 /* 244 * Construct a cloned dev_t to hand back. 245 */ 246 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 247 return (0); 248 249 failed: 250 dld_str_destroy(dsp); 251 return (err); 252 } 253 254 /* 255 * qi_qclose: close(9e) 256 */ 257 int 258 dld_close(queue_t *rq) 259 { 260 dld_str_t *dsp = rq->q_ptr; 261 262 ASSERT(dsp->ds_task_id == NULL); 263 264 /* 265 * Disable the queue srv(9e) routine. 266 */ 267 qprocsoff(rq); 268 269 /* 270 * At this point we can not be entered by any threads via STREAMS 271 * or the direct call interface, which is available only to IP. 272 * After the interface is unplumbed, IP wouldn't have any reference 273 * to this instance, and therefore we are now effectively single 274 * threaded and don't require any lock protection. Flush all 275 * pending packets which are sitting in the transmit queue. 276 */ 277 ASSERT(dsp->ds_thr == 0); 278 dld_tx_flush(dsp); 279 280 /* 281 * This stream was open to a provider node. Check to see 282 * if it has been cleanly shut down. 283 */ 284 if (dsp->ds_dlstate != DL_UNATTACHED) { 285 /* 286 * The stream is either open to a style 1 provider or 287 * this is not clean shutdown. Detach from the PPA. 288 * (This is still ok even in the style 1 case). 289 */ 290 dld_str_detach(dsp); 291 } 292 293 dld_str_destroy(dsp); 294 return (0); 295 } 296 297 /* 298 * qi_qputp: put(9e) 299 */ 300 void 301 dld_wput(queue_t *wq, mblk_t *mp) 302 { 303 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 304 305 DLD_ENTER(dsp); 306 307 switch (DB_TYPE(mp)) { 308 case M_DATA: 309 rw_enter(&dsp->ds_lock, RW_READER); 310 if (dsp->ds_dlstate != DL_IDLE || 311 dsp->ds_mode == DLD_UNITDATA) { 312 freemsg(mp); 313 } else if (dsp->ds_mode == DLD_FASTPATH) { 314 str_mdata_fastpath_put(dsp, mp); 315 } else if (dsp->ds_mode == DLD_RAW) { 316 str_mdata_raw_put(dsp, mp); 317 } 318 rw_exit(&dsp->ds_lock); 319 break; 320 case M_PROTO: 321 case M_PCPROTO: 322 dld_proto(dsp, mp); 323 break; 324 case M_IOCTL: 325 dld_ioc(dsp, mp); 326 break; 327 case M_FLUSH: 328 if (*mp->b_rptr & FLUSHW) { 329 dld_tx_flush(dsp); 330 *mp->b_rptr &= ~FLUSHW; 331 } 332 333 if (*mp->b_rptr & FLUSHR) { 334 qreply(wq, mp); 335 } else { 336 freemsg(mp); 337 } 338 break; 339 default: 340 freemsg(mp); 341 break; 342 } 343 344 DLD_EXIT(dsp); 345 } 346 347 /* 348 * qi_srvp: srv(9e) 349 */ 350 void 351 dld_wsrv(queue_t *wq) 352 { 353 mblk_t *mp; 354 dld_str_t *dsp = wq->q_ptr; 355 356 DLD_ENTER(dsp); 357 rw_enter(&dsp->ds_lock, RW_READER); 358 /* 359 * Grab all packets (chained via b_next) off our transmit queue 360 * and try to send them all to the MAC layer. Since the queue 361 * is independent of streams, we are able to dequeue all messages 362 * at once without looping through getq() and manually chaining 363 * them. Note that the queue size parameters (byte and message 364 * counts) are cleared as well, but we postpone the backenabling 365 * until after the MAC transmit since some packets may end up 366 * back at our transmit queue. 367 */ 368 mutex_enter(&dsp->ds_tx_list_lock); 369 if ((mp = dsp->ds_tx_list_head) == NULL) { 370 ASSERT(!dsp->ds_tx_qbusy); 371 ASSERT(dsp->ds_tx_flow_mp != NULL); 372 ASSERT(dsp->ds_tx_list_head == NULL); 373 ASSERT(dsp->ds_tx_list_tail == NULL); 374 ASSERT(dsp->ds_tx_cnt == 0); 375 ASSERT(dsp->ds_tx_msgcnt == 0); 376 mutex_exit(&dsp->ds_tx_list_lock); 377 goto done; 378 } 379 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 380 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 381 mutex_exit(&dsp->ds_tx_list_lock); 382 383 /* 384 * Discard packets unless we are attached and bound; note that 385 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 386 * because regardless of the mode all transmit will end up in 387 * str_mdata_fastpath_put() where the packets may be queued. 388 */ 389 ASSERT(DB_TYPE(mp) == M_DATA); 390 if (dsp->ds_dlstate != DL_IDLE) { 391 freemsgchain(mp); 392 goto done; 393 } 394 395 /* 396 * Attempt to transmit one or more packets. If the MAC can't 397 * send them all, re-queue the packet(s) at the beginning of 398 * the transmit queue to avoid any re-ordering. 399 */ 400 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 401 dld_tx_enqueue(dsp, mp, B_TRUE); 402 403 /* 404 * Grab the list lock again and check if the transmit queue is 405 * really empty; if so, lift up flow-control and backenable any 406 * writer queues. If the queue is not empty, schedule service 407 * thread to drain it. 408 */ 409 mutex_enter(&dsp->ds_tx_list_lock); 410 if (dsp->ds_tx_list_head == NULL) { 411 dsp->ds_tx_flow_mp = getq(wq); 412 ASSERT(dsp->ds_tx_flow_mp != NULL); 413 dsp->ds_tx_qbusy = B_FALSE; 414 } 415 mutex_exit(&dsp->ds_tx_list_lock); 416 done: 417 rw_exit(&dsp->ds_lock); 418 DLD_EXIT(dsp); 419 } 420 421 void 422 dld_init_ops(struct dev_ops *ops, const char *name) 423 { 424 struct streamtab *stream; 425 struct qinit *rq, *wq; 426 struct module_info *modinfo; 427 428 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 429 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 430 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 431 modinfo->mi_minpsz = 0; 432 modinfo->mi_maxpsz = 64*1024; 433 modinfo->mi_hiwat = 1; 434 modinfo->mi_lowat = 0; 435 436 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 437 rq->qi_qopen = dld_open; 438 rq->qi_qclose = dld_close; 439 rq->qi_minfo = modinfo; 440 441 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 442 wq->qi_putp = (pfi_t)dld_wput; 443 wq->qi_srvp = (pfi_t)dld_wsrv; 444 wq->qi_minfo = modinfo; 445 446 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 447 stream->st_rdinit = rq; 448 stream->st_wrinit = wq; 449 ops->devo_cb_ops->cb_str = stream; 450 451 ops->devo_getinfo = &dld_getinfo; 452 } 453 454 void 455 dld_fini_ops(struct dev_ops *ops) 456 { 457 struct streamtab *stream; 458 struct qinit *rq, *wq; 459 struct module_info *modinfo; 460 461 stream = ops->devo_cb_ops->cb_str; 462 rq = stream->st_rdinit; 463 wq = stream->st_wrinit; 464 modinfo = rq->qi_minfo; 465 ASSERT(wq->qi_minfo == modinfo); 466 467 kmem_free(stream, sizeof (struct streamtab)); 468 kmem_free(wq, sizeof (struct qinit)); 469 kmem_free(rq, sizeof (struct qinit)); 470 kmem_free(modinfo->mi_idname, FMNAMESZ); 471 kmem_free(modinfo, sizeof (struct module_info)); 472 } 473 474 /* 475 * Initialize this module's data structures. 476 */ 477 void 478 dld_str_init(void) 479 { 480 /* 481 * Create dld_str_t object cache. 482 */ 483 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 484 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 485 ASSERT(str_cachep != NULL); 486 487 /* 488 * Allocate a vmem arena to manage minor numbers. The range of the 489 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 490 * minor number). 491 */ 492 minor_arenap = vmem_create("dld_minor_arena", 493 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 494 VM_SLEEP | VMC_IDENTIFIER); 495 ASSERT(minor_arenap != NULL); 496 } 497 498 /* 499 * Tear down this module's data structures. 500 */ 501 int 502 dld_str_fini(void) 503 { 504 /* 505 * Make sure that there are no objects in use. 506 */ 507 if (str_count != 0) 508 return (EBUSY); 509 510 /* 511 * Check to see if there are any minor numbers still in use. 512 */ 513 if (minor_count != 0) 514 return (EBUSY); 515 516 /* 517 * Destroy object cache. 518 */ 519 kmem_cache_destroy(str_cachep); 520 vmem_destroy(minor_arenap); 521 return (0); 522 } 523 524 /* 525 * Create a new dld_str_t object. 526 */ 527 dld_str_t * 528 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 529 { 530 dld_str_t *dsp; 531 532 /* 533 * Allocate an object from the cache. 534 */ 535 atomic_add_32(&str_count, 1); 536 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 537 538 /* 539 * Allocate the dummy mblk for flow-control. 540 */ 541 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 542 if (dsp->ds_tx_flow_mp == NULL) { 543 kmem_cache_free(str_cachep, dsp); 544 atomic_add_32(&str_count, -1); 545 return (NULL); 546 } 547 dsp->ds_type = type; 548 dsp->ds_major = major; 549 dsp->ds_style = style; 550 551 /* 552 * Initialize the queue pointers. 553 */ 554 ASSERT(RD(rq) == rq); 555 dsp->ds_rq = rq; 556 dsp->ds_wq = WR(rq); 557 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 558 559 /* 560 * We want explicit control over our write-side STREAMS queue 561 * where the dummy mblk gets added/removed for flow-control. 562 */ 563 noenable(WR(rq)); 564 565 return (dsp); 566 } 567 568 /* 569 * Destroy a dld_str_t object. 570 */ 571 void 572 dld_str_destroy(dld_str_t *dsp) 573 { 574 queue_t *rq; 575 queue_t *wq; 576 577 /* 578 * Clear the queue pointers. 579 */ 580 rq = dsp->ds_rq; 581 wq = dsp->ds_wq; 582 ASSERT(wq == WR(rq)); 583 584 rq->q_ptr = wq->q_ptr = NULL; 585 dsp->ds_rq = dsp->ds_wq = NULL; 586 587 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 588 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 589 ASSERT(dsp->ds_tx_list_head == NULL); 590 ASSERT(dsp->ds_tx_list_tail == NULL); 591 ASSERT(dsp->ds_tx_cnt == 0); 592 ASSERT(dsp->ds_tx_msgcnt == 0); 593 ASSERT(!dsp->ds_tx_qbusy); 594 595 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 596 ASSERT(dsp->ds_thr == 0); 597 ASSERT(dsp->ds_detach_req == NULL); 598 599 /* 600 * Reinitialize all the flags. 601 */ 602 dsp->ds_notifications = 0; 603 dsp->ds_passivestate = DLD_UNINITIALIZED; 604 dsp->ds_mode = DLD_UNITDATA; 605 606 /* 607 * Free the dummy mblk if exists. 608 */ 609 if (dsp->ds_tx_flow_mp != NULL) { 610 freeb(dsp->ds_tx_flow_mp); 611 dsp->ds_tx_flow_mp = NULL; 612 } 613 /* 614 * Free the object back to the cache. 615 */ 616 kmem_cache_free(str_cachep, dsp); 617 atomic_add_32(&str_count, -1); 618 } 619 620 /* 621 * kmem_cache contructor function: see kmem_cache_create(9f). 622 */ 623 /*ARGSUSED*/ 624 static int 625 str_constructor(void *buf, void *cdrarg, int kmflags) 626 { 627 dld_str_t *dsp = buf; 628 629 bzero(buf, sizeof (dld_str_t)); 630 631 /* 632 * Allocate a new minor number. 633 */ 634 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 635 return (-1); 636 637 /* 638 * Initialize the DLPI state machine. 639 */ 640 dsp->ds_dlstate = DL_UNATTACHED; 641 642 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 643 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 644 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 645 646 return (0); 647 } 648 649 /* 650 * kmem_cache destructor function. 651 */ 652 /*ARGSUSED*/ 653 static void 654 str_destructor(void *buf, void *cdrarg) 655 { 656 dld_str_t *dsp = buf; 657 658 /* 659 * Make sure the DLPI state machine was reset. 660 */ 661 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 662 663 /* 664 * Make sure the data-link interface was closed. 665 */ 666 ASSERT(dsp->ds_mh == NULL); 667 ASSERT(dsp->ds_dc == NULL); 668 669 /* 670 * Make sure enabled notifications are cleared. 671 */ 672 ASSERT(dsp->ds_notifications == 0); 673 674 /* 675 * Make sure polling is disabled. 676 */ 677 ASSERT(!dsp->ds_polling); 678 679 /* 680 * Release the minor number. 681 */ 682 dld_minor_rele(dsp->ds_minor); 683 684 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 685 rw_destroy(&dsp->ds_lock); 686 687 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 688 mutex_destroy(&dsp->ds_tx_list_lock); 689 ASSERT(dsp->ds_tx_flow_mp == NULL); 690 691 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 692 mutex_destroy(&dsp->ds_thr_lock); 693 ASSERT(dsp->ds_detach_req == NULL); 694 } 695 696 /* 697 * M_DATA put (IP fast-path mode) 698 */ 699 void 700 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 701 { 702 /* 703 * This function can be called from within dld or from an upper 704 * layer protocol (currently only tcp). If we are in the busy 705 * mode enqueue the packet(s) and return. Otherwise hand them 706 * over to the MAC driver for transmission; any remaining one(s) 707 * which didn't get sent will be queued. 708 * 709 * Note here that we don't grab the list lock prior to checking 710 * the busy flag. This is okay, because a missed transition 711 * will not cause any packet reordering for any particular TCP 712 * connection (which is single-threaded). The enqueue routine 713 * will atomically set the busy flag and schedule the service 714 * thread to run; the flag is only cleared by the service thread 715 * when there is no more packet to be transmitted. 716 */ 717 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 718 dld_tx_enqueue(dsp, mp, B_FALSE); 719 } 720 721 /* 722 * M_DATA put (raw mode) 723 */ 724 void 725 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 726 { 727 struct ether_header *ehp; 728 mblk_t *bp; 729 size_t size; 730 size_t hdrlen; 731 732 size = MBLKL(mp); 733 if (size < sizeof (struct ether_header)) 734 goto discard; 735 736 hdrlen = sizeof (struct ether_header); 737 738 ehp = (struct ether_header *)mp->b_rptr; 739 if (ntohs(ehp->ether_type) == VLAN_TPID) { 740 struct ether_vlan_header *evhp; 741 742 if (size < sizeof (struct ether_vlan_header)) 743 goto discard; 744 745 /* 746 * Replace vtag with our own 747 */ 748 evhp = (struct ether_vlan_header *)ehp; 749 evhp->ether_tci = htons(VLAN_TCI(dsp->ds_pri, 750 ETHER_CFI, dsp->ds_vid)); 751 hdrlen = sizeof (struct ether_vlan_header); 752 } 753 754 /* 755 * Check the packet is not too big and that any remaining 756 * fragment list is composed entirely of M_DATA messages. (We 757 * know the first fragment was M_DATA otherwise we could not 758 * have got here). 759 */ 760 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 761 if (DB_TYPE(bp) != M_DATA) 762 goto discard; 763 size += MBLKL(bp); 764 } 765 766 if (size > dsp->ds_mip->mi_sdu_max + hdrlen) 767 goto discard; 768 769 str_mdata_fastpath_put(dsp, mp); 770 return; 771 772 discard: 773 freemsg(mp); 774 } 775 776 /* 777 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 778 */ 779 int 780 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 781 { 782 int err; 783 const char *drvname; 784 char name[MAXNAMELEN]; 785 dls_channel_t dc; 786 uint_t addr_length; 787 788 ASSERT(dsp->ds_dc == NULL); 789 790 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 791 return (EINVAL); 792 793 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 794 795 if (strcmp(drvname, "aggr") != 0 && 796 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 797 return (EINVAL); 798 799 /* 800 * Open a channel. 801 */ 802 if ((err = dls_open(name, &dc)) != 0) { 803 (void) qassociate(dsp->ds_wq, -1); 804 return (err); 805 } 806 807 /* 808 * Cache the MAC interface handle, a pointer to the immutable MAC 809 * information and the current and 'factory' MAC address. 810 */ 811 dsp->ds_mh = dls_mac(dc); 812 dsp->ds_mip = mac_info(dsp->ds_mh); 813 814 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 815 816 addr_length = dsp->ds_mip->mi_addr_length; 817 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 818 819 /* 820 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 821 * a non-VLAN interface). 822 */ 823 dsp->ds_vid = dls_vid(dc); 824 825 /* 826 * Set the default packet priority. 827 */ 828 dsp->ds_pri = 0; 829 830 /* 831 * Add a notify function so that the we get updates from the MAC. 832 */ 833 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 834 835 dsp->ds_dc = dc; 836 dsp->ds_dlstate = DL_UNBOUND; 837 838 return (0); 839 } 840 841 /* 842 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 843 * from close(2) for style 2. 844 */ 845 void 846 dld_str_detach(dld_str_t *dsp) 847 { 848 ASSERT(dsp->ds_thr == 0); 849 850 /* 851 * Remove the notify function. 852 */ 853 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 854 855 /* 856 * Re-initialize the DLPI state machine. 857 */ 858 dsp->ds_dlstate = DL_UNATTACHED; 859 860 /* 861 * Clear the polling and promisc flags. 862 */ 863 dsp->ds_polling = B_FALSE; 864 dsp->ds_soft_ring = B_FALSE; 865 dsp->ds_promisc = 0; 866 867 /* 868 * Close the channel. 869 */ 870 dls_close(dsp->ds_dc); 871 dsp->ds_dc = NULL; 872 dsp->ds_mh = NULL; 873 874 (void) qassociate(dsp->ds_wq, -1); 875 } 876 877 /* 878 * Raw mode receive function. 879 */ 880 /*ARGSUSED*/ 881 void 882 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 883 size_t header_length) 884 { 885 dld_str_t *dsp = (dld_str_t *)arg; 886 mblk_t *next; 887 888 ASSERT(mp != NULL); 889 do { 890 /* 891 * Get the pointer to the next packet in the chain and then 892 * clear b_next before the packet gets passed on. 893 */ 894 next = mp->b_next; 895 mp->b_next = NULL; 896 897 /* 898 * Wind back b_rptr to point at the MAC header. 899 */ 900 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 901 mp->b_rptr -= header_length; 902 if (header_length == sizeof (struct ether_vlan_header)) { 903 /* 904 * Strip off the vtag 905 */ 906 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 907 2 * ETHERADDRL); 908 mp->b_rptr += VLAN_TAGSZ; 909 } 910 911 /* 912 * Pass the packet on. 913 */ 914 putnext(dsp->ds_rq, mp); 915 916 /* 917 * Move on to the next packet in the chain. 918 */ 919 mp = next; 920 } while (mp != NULL); 921 } 922 923 /* 924 * Fast-path receive function. 925 */ 926 /*ARGSUSED*/ 927 void 928 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 929 size_t header_length) 930 { 931 dld_str_t *dsp = (dld_str_t *)arg; 932 mblk_t *next; 933 934 ASSERT(mp != NULL); 935 do { 936 /* 937 * Get the pointer to the next packet in the chain and then 938 * clear b_next before the packet gets passed on. 939 */ 940 next = mp->b_next; 941 mp->b_next = NULL; 942 943 /* 944 * Pass the packet on. 945 */ 946 putnext(dsp->ds_rq, mp); 947 948 /* 949 * Move on to the next packet in the chain. 950 */ 951 mp = next; 952 } while (mp != NULL); 953 } 954 955 /* 956 * Default receive function (send DL_UNITDATA_IND messages). 957 */ 958 /*ARGSUSED*/ 959 void 960 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 961 size_t header_length) 962 { 963 dld_str_t *dsp = (dld_str_t *)arg; 964 mblk_t *ud_mp; 965 mblk_t *next; 966 967 ASSERT(mp != NULL); 968 do { 969 /* 970 * Get the pointer to the next packet in the chain and then 971 * clear b_next before the packet gets passed on. 972 */ 973 next = mp->b_next; 974 mp->b_next = NULL; 975 976 /* 977 * Wind back b_rptr to point at the MAC header. 978 */ 979 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 980 mp->b_rptr -= header_length; 981 982 /* 983 * Create the DL_UNITDATA_IND M_PROTO. 984 */ 985 if ((ud_mp = str_unitdata_ind(dsp, mp)) == NULL) { 986 freemsgchain(mp); 987 return; 988 } 989 990 /* 991 * Advance b_rptr to point at the payload again. 992 */ 993 mp->b_rptr += header_length; 994 995 /* 996 * Prepend the DL_UNITDATA_IND. 997 */ 998 ud_mp->b_cont = mp; 999 1000 /* 1001 * Send the message. 1002 */ 1003 putnext(dsp->ds_rq, ud_mp); 1004 1005 /* 1006 * Move on to the next packet in the chain. 1007 */ 1008 mp = next; 1009 } while (mp != NULL); 1010 } 1011 1012 /* 1013 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1014 * current state of the interface. 1015 */ 1016 void 1017 dld_str_notify_ind(dld_str_t *dsp) 1018 { 1019 mac_notify_type_t type; 1020 1021 for (type = 0; type < MAC_NNOTE; type++) 1022 str_notify(dsp, type); 1023 } 1024 1025 typedef struct dl_unitdata_ind_wrapper { 1026 dl_unitdata_ind_t dl_unitdata; 1027 uint8_t dl_dest_addr[MAXADDRLEN + sizeof (uint16_t)]; 1028 uint8_t dl_src_addr[MAXADDRLEN + sizeof (uint16_t)]; 1029 } dl_unitdata_ind_wrapper_t; 1030 1031 /* 1032 * Create a DL_UNITDATA_IND M_PROTO message. 1033 */ 1034 static mblk_t * 1035 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp) 1036 { 1037 mblk_t *nmp; 1038 dl_unitdata_ind_wrapper_t *dlwp; 1039 dl_unitdata_ind_t *dlp; 1040 dls_header_info_t dhi; 1041 uint_t addr_length; 1042 uint8_t *daddr; 1043 uint8_t *saddr; 1044 1045 /* 1046 * Get the packet header information. 1047 */ 1048 dls_header_info(dsp->ds_dc, mp, &dhi); 1049 1050 /* 1051 * Allocate a message large enough to contain the wrapper structure 1052 * defined above. 1053 */ 1054 if ((nmp = mexchange(dsp->ds_wq, NULL, 1055 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1056 DL_UNITDATA_IND)) == NULL) 1057 return (NULL); 1058 1059 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1060 1061 dlp = &(dlwp->dl_unitdata); 1062 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1063 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1064 1065 /* 1066 * Copy in the destination address. 1067 */ 1068 addr_length = dsp->ds_mip->mi_addr_length; 1069 daddr = dlwp->dl_dest_addr; 1070 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1071 bcopy(dhi.dhi_daddr, daddr, addr_length); 1072 1073 /* 1074 * Set the destination DLSAP to our bound DLSAP value. 1075 */ 1076 *(uint16_t *)(daddr + addr_length) = dsp->ds_sap; 1077 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1078 1079 /* 1080 * If the destination address was a group address then 1081 * dl_group_address field should be non-zero. 1082 */ 1083 dlp->dl_group_address = dhi.dhi_isgroup; 1084 1085 /* 1086 * Copy in the source address. 1087 */ 1088 saddr = dlwp->dl_src_addr; 1089 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1090 bcopy(dhi.dhi_saddr, saddr, addr_length); 1091 1092 /* 1093 * Set the source DLSAP to the packet ethertype. 1094 */ 1095 *(uint16_t *)(saddr + addr_length) = dhi.dhi_ethertype; 1096 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1097 1098 return (nmp); 1099 } 1100 1101 /* 1102 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1103 */ 1104 static void 1105 str_notify_promisc_on_phys(dld_str_t *dsp) 1106 { 1107 mblk_t *mp; 1108 dl_notify_ind_t *dlip; 1109 1110 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1111 return; 1112 1113 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1114 M_PROTO, 0)) == NULL) 1115 return; 1116 1117 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1118 dlip = (dl_notify_ind_t *)mp->b_rptr; 1119 dlip->dl_primitive = DL_NOTIFY_IND; 1120 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1121 1122 qreply(dsp->ds_wq, mp); 1123 } 1124 1125 /* 1126 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1127 */ 1128 static void 1129 str_notify_promisc_off_phys(dld_str_t *dsp) 1130 { 1131 mblk_t *mp; 1132 dl_notify_ind_t *dlip; 1133 1134 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1135 return; 1136 1137 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1138 M_PROTO, 0)) == NULL) 1139 return; 1140 1141 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1142 dlip = (dl_notify_ind_t *)mp->b_rptr; 1143 dlip->dl_primitive = DL_NOTIFY_IND; 1144 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1145 1146 qreply(dsp->ds_wq, mp); 1147 } 1148 1149 /* 1150 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1151 */ 1152 static void 1153 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1154 { 1155 mblk_t *mp; 1156 dl_notify_ind_t *dlip; 1157 uint_t addr_length; 1158 uint16_t ethertype; 1159 1160 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1161 return; 1162 1163 addr_length = dsp->ds_mip->mi_addr_length; 1164 if ((mp = mexchange(dsp->ds_wq, NULL, 1165 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1166 M_PROTO, 0)) == NULL) 1167 return; 1168 1169 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1170 dlip = (dl_notify_ind_t *)mp->b_rptr; 1171 dlip->dl_primitive = DL_NOTIFY_IND; 1172 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1173 dlip->dl_data = DL_CURR_PHYS_ADDR; 1174 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1175 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1176 1177 bcopy(addr, &dlip[1], addr_length); 1178 1179 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1180 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1181 ethertype; 1182 1183 qreply(dsp->ds_wq, mp); 1184 } 1185 1186 /* 1187 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1188 */ 1189 static void 1190 str_notify_link_up(dld_str_t *dsp) 1191 { 1192 mblk_t *mp; 1193 dl_notify_ind_t *dlip; 1194 1195 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1196 return; 1197 1198 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1199 M_PROTO, 0)) == NULL) 1200 return; 1201 1202 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1203 dlip = (dl_notify_ind_t *)mp->b_rptr; 1204 dlip->dl_primitive = DL_NOTIFY_IND; 1205 dlip->dl_notification = DL_NOTE_LINK_UP; 1206 1207 qreply(dsp->ds_wq, mp); 1208 } 1209 1210 /* 1211 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1212 */ 1213 static void 1214 str_notify_link_down(dld_str_t *dsp) 1215 { 1216 mblk_t *mp; 1217 dl_notify_ind_t *dlip; 1218 1219 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1220 return; 1221 1222 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1223 M_PROTO, 0)) == NULL) 1224 return; 1225 1226 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1227 dlip = (dl_notify_ind_t *)mp->b_rptr; 1228 dlip->dl_primitive = DL_NOTIFY_IND; 1229 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1230 1231 qreply(dsp->ds_wq, mp); 1232 } 1233 1234 /* 1235 * DL_NOTIFY_IND: DL_NOTE_SPEED 1236 */ 1237 static void 1238 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1239 { 1240 mblk_t *mp; 1241 dl_notify_ind_t *dlip; 1242 1243 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1244 return; 1245 1246 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1247 M_PROTO, 0)) == NULL) 1248 return; 1249 1250 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1251 dlip = (dl_notify_ind_t *)mp->b_rptr; 1252 dlip->dl_primitive = DL_NOTIFY_IND; 1253 dlip->dl_notification = DL_NOTE_SPEED; 1254 dlip->dl_data = speed; 1255 1256 qreply(dsp->ds_wq, mp); 1257 } 1258 1259 /* 1260 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1261 */ 1262 static void 1263 str_notify_capab_reneg(dld_str_t *dsp) 1264 { 1265 mblk_t *mp; 1266 dl_notify_ind_t *dlip; 1267 1268 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1269 return; 1270 1271 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1272 M_PROTO, 0)) == NULL) 1273 return; 1274 1275 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1276 dlip = (dl_notify_ind_t *)mp->b_rptr; 1277 dlip->dl_primitive = DL_NOTIFY_IND; 1278 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1279 1280 qreply(dsp->ds_wq, mp); 1281 } 1282 1283 /* 1284 * MAC notification callback. 1285 */ 1286 static void 1287 str_notify(void *arg, mac_notify_type_t type) 1288 { 1289 dld_str_t *dsp = (dld_str_t *)arg; 1290 queue_t *q = dsp->ds_wq; 1291 1292 switch (type) { 1293 case MAC_NOTE_TX: 1294 qenable(q); 1295 break; 1296 1297 case MAC_NOTE_DEVPROMISC: 1298 /* 1299 * Send the appropriate DL_NOTIFY_IND. 1300 */ 1301 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1302 str_notify_promisc_on_phys(dsp); 1303 else 1304 str_notify_promisc_off_phys(dsp); 1305 break; 1306 1307 case MAC_NOTE_PROMISC: 1308 break; 1309 1310 case MAC_NOTE_UNICST: 1311 /* 1312 * This notification is sent whenever the MAC unicast address 1313 * changes. We need to re-cache the address. 1314 */ 1315 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1316 1317 /* 1318 * Send the appropriate DL_NOTIFY_IND. 1319 */ 1320 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1321 break; 1322 1323 case MAC_NOTE_LINK: 1324 /* 1325 * This notification is sent every time the MAC driver 1326 * updates the link state. 1327 */ 1328 switch (mac_link_get(dsp->ds_mh)) { 1329 case LINK_STATE_UP: 1330 /* 1331 * The link is up so send the appropriate 1332 * DL_NOTIFY_IND. 1333 */ 1334 str_notify_link_up(dsp); 1335 1336 /* 1337 * If we can find the link speed then send a 1338 * DL_NOTIFY_IND for that too. 1339 */ 1340 if (dsp->ds_mip->mi_stat[MAC_STAT_IFSPEED]) { 1341 uint64_t val; 1342 1343 val = mac_stat_get(dsp->ds_mh, 1344 MAC_STAT_IFSPEED); 1345 str_notify_speed(dsp, 1346 (uint32_t)(val / 1000ull)); 1347 } 1348 break; 1349 1350 case LINK_STATE_DOWN: 1351 /* 1352 * The link is down so send the appropriate 1353 * DL_NOTIFY_IND. 1354 */ 1355 str_notify_link_down(dsp); 1356 break; 1357 1358 default: 1359 break; 1360 } 1361 break; 1362 1363 case MAC_NOTE_RESOURCE: 1364 /* 1365 * This notification is sent whenever the MAC resources 1366 * change. We need to renegotiate the capabilities. 1367 * Send the appropriate DL_NOTIFY_IND. 1368 */ 1369 str_notify_capab_reneg(dsp); 1370 break; 1371 1372 default: 1373 ASSERT(B_FALSE); 1374 break; 1375 } 1376 } 1377 1378 /* 1379 * Enqueue one or more messages to the transmit queue. 1380 * Caller specifies the insertion position (head/tail). 1381 */ 1382 void 1383 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1384 { 1385 mblk_t *tail; 1386 queue_t *q = dsp->ds_wq; 1387 uint_t cnt, msgcnt; 1388 uint_t tot_cnt, tot_msgcnt; 1389 1390 ASSERT(DB_TYPE(mp) == M_DATA); 1391 /* Calculate total size and count of the packet(s) */ 1392 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1393 tail->b_next != NULL; tail = tail->b_next) { 1394 ASSERT(DB_TYPE(tail) == M_DATA); 1395 cnt += msgdsize(tail); 1396 msgcnt++; 1397 } 1398 1399 mutex_enter(&dsp->ds_tx_list_lock); 1400 /* 1401 * If the queue depth would exceed the allowed threshold, drop 1402 * new packet(s) and drain those already in the queue. 1403 */ 1404 tot_cnt = dsp->ds_tx_cnt + cnt; 1405 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1406 1407 if (!head_insert && 1408 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1409 ASSERT(dsp->ds_tx_qbusy); 1410 mutex_exit(&dsp->ds_tx_list_lock); 1411 freemsgchain(mp); 1412 goto done; 1413 } 1414 1415 /* Update the queue size parameters */ 1416 dsp->ds_tx_cnt = tot_cnt; 1417 dsp->ds_tx_msgcnt = tot_msgcnt; 1418 1419 /* 1420 * If the transmit queue is currently empty and we are 1421 * about to deposit the packet(s) there, switch mode to 1422 * "busy" and raise flow-control condition. 1423 */ 1424 if (!dsp->ds_tx_qbusy) { 1425 dsp->ds_tx_qbusy = B_TRUE; 1426 ASSERT(dsp->ds_tx_flow_mp != NULL); 1427 (void) putq(q, dsp->ds_tx_flow_mp); 1428 dsp->ds_tx_flow_mp = NULL; 1429 } 1430 1431 if (!head_insert) { 1432 /* Tail insertion */ 1433 if (dsp->ds_tx_list_head == NULL) 1434 dsp->ds_tx_list_head = mp; 1435 else 1436 dsp->ds_tx_list_tail->b_next = mp; 1437 dsp->ds_tx_list_tail = tail; 1438 } else { 1439 /* Head insertion */ 1440 tail->b_next = dsp->ds_tx_list_head; 1441 if (dsp->ds_tx_list_head == NULL) 1442 dsp->ds_tx_list_tail = tail; 1443 dsp->ds_tx_list_head = mp; 1444 } 1445 mutex_exit(&dsp->ds_tx_list_lock); 1446 done: 1447 /* Schedule service thread to drain the transmit queue */ 1448 qenable(q); 1449 } 1450 1451 void 1452 dld_tx_flush(dld_str_t *dsp) 1453 { 1454 mutex_enter(&dsp->ds_tx_list_lock); 1455 if (dsp->ds_tx_list_head != NULL) { 1456 freemsgchain(dsp->ds_tx_list_head); 1457 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1458 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1459 if (dsp->ds_tx_qbusy) { 1460 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1461 ASSERT(dsp->ds_tx_flow_mp != NULL); 1462 dsp->ds_tx_qbusy = B_FALSE; 1463 } 1464 } 1465 mutex_exit(&dsp->ds_tx_list_lock); 1466 } 1467 1468 /* 1469 * Process an M_IOCTL message. 1470 */ 1471 static void 1472 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1473 { 1474 uint_t cmd; 1475 1476 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1477 ASSERT(dsp->ds_type == DLD_DLPI); 1478 1479 switch (cmd) { 1480 case DLIOCRAW: 1481 ioc_raw(dsp, mp); 1482 break; 1483 case DLIOCHDRINFO: 1484 ioc_fast(dsp, mp); 1485 break; 1486 default: 1487 ioc(dsp, mp); 1488 } 1489 } 1490 1491 /* 1492 * DLIOCRAW 1493 */ 1494 static void 1495 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1496 { 1497 queue_t *q = dsp->ds_wq; 1498 1499 rw_enter(&dsp->ds_lock, RW_WRITER); 1500 if (dsp->ds_polling || dsp->ds_soft_ring) { 1501 rw_exit(&dsp->ds_lock); 1502 miocnak(q, mp, 0, EPROTO); 1503 return; 1504 } 1505 1506 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1507 /* 1508 * Set the receive callback. 1509 */ 1510 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1511 1512 /* 1513 * Note that raw mode is enabled. 1514 */ 1515 dsp->ds_mode = DLD_RAW; 1516 } 1517 1518 rw_exit(&dsp->ds_lock); 1519 miocack(q, mp, 0, 0); 1520 } 1521 1522 /* 1523 * DLIOCHDRINFO 1524 */ 1525 static void 1526 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1527 { 1528 dl_unitdata_req_t *dlp; 1529 off_t off; 1530 size_t len; 1531 const uint8_t *addr; 1532 uint16_t sap; 1533 mblk_t *nmp; 1534 mblk_t *hmp; 1535 uint_t addr_length; 1536 queue_t *q = dsp->ds_wq; 1537 int err; 1538 dls_channel_t dc; 1539 1540 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1541 err = ENOTSUP; 1542 goto failed; 1543 } 1544 1545 nmp = mp->b_cont; 1546 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1547 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1548 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1549 err = EINVAL; 1550 goto failed; 1551 } 1552 1553 off = dlp->dl_dest_addr_offset; 1554 len = dlp->dl_dest_addr_length; 1555 1556 if (!MBLKIN(nmp, off, len)) { 1557 err = EINVAL; 1558 goto failed; 1559 } 1560 1561 rw_enter(&dsp->ds_lock, RW_READER); 1562 if (dsp->ds_dlstate != DL_IDLE) { 1563 rw_exit(&dsp->ds_lock); 1564 err = ENOTSUP; 1565 goto failed; 1566 } 1567 1568 addr_length = dsp->ds_mip->mi_addr_length; 1569 if (len != addr_length + sizeof (uint16_t)) { 1570 rw_exit(&dsp->ds_lock); 1571 err = EINVAL; 1572 goto failed; 1573 } 1574 1575 addr = nmp->b_rptr + off; 1576 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 1577 dc = dsp->ds_dc; 1578 1579 if ((hmp = dls_header(dc, addr, sap, dsp->ds_pri)) == NULL) { 1580 rw_exit(&dsp->ds_lock); 1581 err = ENOMEM; 1582 goto failed; 1583 } 1584 1585 /* 1586 * This is a performance optimization. We originally entered 1587 * as reader and only become writer upon transitioning into 1588 * the DLD_FASTPATH mode for the first time. Otherwise we 1589 * stay as reader and return the fast-path header to IP. 1590 */ 1591 if (dsp->ds_mode != DLD_FASTPATH) { 1592 if (!rw_tryupgrade(&dsp->ds_lock)) { 1593 rw_exit(&dsp->ds_lock); 1594 rw_enter(&dsp->ds_lock, RW_WRITER); 1595 1596 /* 1597 * State may have changed before we re-acquired 1598 * the writer lock in case the upgrade failed. 1599 */ 1600 if (dsp->ds_dlstate != DL_IDLE) { 1601 rw_exit(&dsp->ds_lock); 1602 err = ENOTSUP; 1603 goto failed; 1604 } 1605 } 1606 1607 /* 1608 * Set the receive callback (unless polling is enabled). 1609 */ 1610 if (!dsp->ds_polling && !dsp->ds_soft_ring) 1611 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 1612 1613 /* 1614 * Note that fast-path mode is enabled. 1615 */ 1616 dsp->ds_mode = DLD_FASTPATH; 1617 } 1618 rw_exit(&dsp->ds_lock); 1619 1620 freemsg(nmp->b_cont); 1621 nmp->b_cont = hmp; 1622 1623 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 1624 return; 1625 failed: 1626 miocnak(q, mp, 0, err); 1627 } 1628 1629 /* 1630 * Catch-all handler. 1631 */ 1632 static void 1633 ioc(dld_str_t *dsp, mblk_t *mp) 1634 { 1635 queue_t *q = dsp->ds_wq; 1636 mac_handle_t mh; 1637 1638 rw_enter(&dsp->ds_lock, RW_READER); 1639 if (dsp->ds_dlstate == DL_UNATTACHED) { 1640 rw_exit(&dsp->ds_lock); 1641 miocnak(q, mp, 0, EINVAL); 1642 return; 1643 } 1644 mh = dsp->ds_mh; 1645 ASSERT(mh != NULL); 1646 rw_exit(&dsp->ds_lock); 1647 mac_ioctl(mh, q, mp); 1648 } 1649 1650 /* 1651 * Allocate a new minor number. 1652 */ 1653 static minor_t 1654 dld_minor_hold(boolean_t sleep) 1655 { 1656 minor_t minor; 1657 1658 /* 1659 * Grab a value from the arena. 1660 */ 1661 atomic_add_32(&minor_count, 1); 1662 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 1663 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 1664 atomic_add_32(&minor_count, -1); 1665 return (0); 1666 } 1667 1668 return (minor); 1669 } 1670 1671 /* 1672 * Release a previously allocated minor number. 1673 */ 1674 static void 1675 dld_minor_rele(minor_t minor) 1676 { 1677 /* 1678 * Return the value to the arena. 1679 */ 1680 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 1681 1682 atomic_add_32(&minor_count, -1); 1683 } 1684