1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Data-Link Driver 31 */ 32 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strsubr.h> 36 #include <sys/atomic.h> 37 #include <sys/mkdev.h> 38 #include <sys/vlan.h> 39 #include <sys/dld.h> 40 #include <sys/dld_impl.h> 41 #include <sys/dls_impl.h> 42 #include <inet/common.h> 43 44 static int str_constructor(void *, void *, int); 45 static void str_destructor(void *, void *); 46 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *); 47 static void str_notify_promisc_on_phys(dld_str_t *); 48 static void str_notify_promisc_off_phys(dld_str_t *); 49 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 50 static void str_notify_link_up(dld_str_t *); 51 static void str_notify_link_down(dld_str_t *); 52 static void str_notify_capab_reneg(dld_str_t *); 53 static void str_notify_speed(dld_str_t *, uint32_t); 54 static void str_notify(void *, mac_notify_type_t); 55 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static minor_t dld_minor_hold(boolean_t); 61 static void dld_minor_rele(minor_t); 62 63 static uint32_t str_count; 64 static kmem_cache_t *str_cachep; 65 static vmem_t *minor_arenap; 66 static uint32_t minor_count; 67 68 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 69 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 70 71 /* 72 * Some notes on entry points, flow-control, queueing and locking: 73 * 74 * This driver exports the traditional STREAMS put entry point as well as 75 * the non-STREAMS fast-path transmit routine which is provided to IP via 76 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 77 * and data operations, while the fast-path routine deals only with M_DATA 78 * fast-path packets. Regardless of the entry point, all outbound packets 79 * will end up in str_mdata_fastpath_put(), where they will be delivered to 80 * the MAC driver. 81 * 82 * The transmit logic operates in two modes: a "not busy" mode where the 83 * packets will be delivered to the MAC for a send attempt, or "busy" mode 84 * where they will be enqueued in the internal queue because of flow-control. 85 * Flow-control happens when the MAC driver indicates the packets couldn't 86 * be transmitted due to lack of resources (e.g. running out of descriptors). 87 * In such case, the driver will place a dummy message on its write-side 88 * STREAMS queue so that the queue is marked as "full". Any subsequent 89 * packets arriving at the driver will be enqueued in the internal queue, 90 * which is drained in the context of the service thread that gets scheduled 91 * whenever the driver is in the "busy" mode. When all packets have been 92 * successfully delivered by MAC and the internal queue is empty, it will 93 * transition to the "not busy" mode by removing the dummy message from the 94 * write-side STREAMS queue; in effect this will trigger backenabling. 95 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 96 * to the above reasons. 97 * 98 * The driver implements an internal transmit queue independent of STREAMS. 99 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 100 * compared to the putq() and get() STREAMS interfaces. The only putq() and 101 * getq() operations done by the driver are those related to placing and 102 * removing the dummy message to/from the write-side STREAMS queue for flow- 103 * control purposes. 104 * 105 * Locking is done independent of STREAMS due to the driver being fully MT. 106 * Threads entering the driver (either from put or service entry points) 107 * will most likely be readers, with the exception of a few writer cases 108 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 109 * DLD-related ioctl requests. The DLPI detach case is special, because 110 * it involves freeing resources and therefore must be single-threaded. 111 * Unfortunately the readers/writers lock can't be used to protect against 112 * it, because the lock is dropped prior to the driver calling places where 113 * putnext() may be invoked, and such places may depend on those resources 114 * to exist. Because of this, the driver always completes the DLPI detach 115 * process when there are no other threads running in the driver. This is 116 * done by keeping track of the number of threads, such that the the last 117 * thread leaving the driver will finish the pending DLPI detach operation. 118 */ 119 120 /* 121 * dld_max_q_count is the queue depth threshold used to limit the number of 122 * outstanding packets or bytes allowed in the queue; once this limit is 123 * reached the driver will free any incoming ones until the queue depth 124 * drops below the threshold. 125 * 126 * This buffering is provided to accomodate clients which do not employ 127 * their own buffering scheme, and to handle occasional packet bursts. 128 * Clients which handle their own buffering will receive positive feedback 129 * from this driver as soon as it transitions into the "busy" state, i.e. 130 * when the queue is initially filled up; they will get backenabled once 131 * the queue is empty. 132 * 133 * The value chosen here is rather arbitrary; in future some intelligent 134 * heuristics may be involved which could take into account the hardware's 135 * transmit ring size, etc. 136 */ 137 uint_t dld_max_q_count = (16 * 1024 *1024); 138 139 static dev_info_t * 140 dld_finddevinfo(dev_t dev) 141 { 142 minor_t minor = getminor(dev); 143 char *drvname = ddi_major_to_name(getmajor(dev)); 144 char name[MAXNAMELEN]; 145 dls_vlan_t *dvp = NULL; 146 dev_info_t *dip = NULL; 147 148 if (drvname == NULL || minor == 0 || minor > DLD_MAX_PPA + 1) 149 return (NULL); 150 151 (void) snprintf(name, MAXNAMELEN, "%s%d", drvname, (int)minor - 1); 152 if (dls_vlan_hold(name, &dvp, B_FALSE) != 0) 153 return (NULL); 154 155 dip = mac_devinfo_get(dvp->dv_dlp->dl_mh); 156 dls_vlan_rele(dvp); 157 return (dip); 158 } 159 160 /* 161 * devo_getinfo: getinfo(9e) 162 */ 163 /*ARGSUSED*/ 164 int 165 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 166 { 167 dev_info_t *devinfo; 168 minor_t minor = getminor((dev_t)arg); 169 int rc = DDI_FAILURE; 170 171 switch (cmd) { 172 case DDI_INFO_DEVT2DEVINFO: 173 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 174 *(dev_info_t **)resp = devinfo; 175 rc = DDI_SUCCESS; 176 } 177 break; 178 case DDI_INFO_DEVT2INSTANCE: 179 if (minor > 0 && minor <= DLD_MAX_PPA + 1) { 180 *(int *)resp = (int)minor - 1; 181 rc = DDI_SUCCESS; 182 } 183 break; 184 } 185 return (rc); 186 } 187 188 /* 189 * qi_qopen: open(9e) 190 */ 191 /*ARGSUSED*/ 192 int 193 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 194 { 195 dld_str_t *dsp; 196 major_t major; 197 minor_t minor; 198 int err; 199 200 if (sflag == MODOPEN) 201 return (ENOTSUP); 202 203 /* 204 * This is a cloning driver and therefore each queue should only 205 * ever get opened once. 206 */ 207 if (rq->q_ptr != NULL) 208 return (EBUSY); 209 210 major = getmajor(*devp); 211 minor = getminor(*devp); 212 if (minor > DLD_MAX_MINOR) 213 return (ENODEV); 214 215 /* 216 * Create a new dld_str_t for the stream. This will grab a new minor 217 * number that will be handed back in the cloned dev_t. Creation may 218 * fail if we can't allocate the dummy mblk used for flow-control. 219 */ 220 dsp = dld_str_create(rq, DLD_DLPI, major, 221 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 222 if (dsp == NULL) 223 return (ENOSR); 224 225 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 226 if (minor != 0) { 227 /* 228 * Style 1 open 229 */ 230 231 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 232 goto failed; 233 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 234 } else { 235 (void) qassociate(rq, -1); 236 } 237 238 /* 239 * Enable the queue srv(9e) routine. 240 */ 241 qprocson(rq); 242 243 /* 244 * Construct a cloned dev_t to hand back. 245 */ 246 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 247 return (0); 248 249 failed: 250 dld_str_destroy(dsp); 251 return (err); 252 } 253 254 /* 255 * qi_qclose: close(9e) 256 */ 257 int 258 dld_close(queue_t *rq) 259 { 260 dld_str_t *dsp = rq->q_ptr; 261 262 /* 263 * Disable the queue srv(9e) routine. 264 */ 265 qprocsoff(rq); 266 267 /* 268 * At this point we can not be entered by any threads via STREAMS 269 * or the direct call interface, which is available only to IP. 270 * After the interface is unplumbed, IP wouldn't have any reference 271 * to this instance, and therefore we are now effectively single 272 * threaded and don't require any lock protection. Flush all 273 * pending packets which are sitting in the transmit queue. 274 */ 275 ASSERT(dsp->ds_thr == 0); 276 dld_tx_flush(dsp); 277 278 /* 279 * This stream was open to a provider node. Check to see 280 * if it has been cleanly shut down. 281 */ 282 if (dsp->ds_dlstate != DL_UNATTACHED) { 283 /* 284 * The stream is either open to a style 1 provider or 285 * this is not clean shutdown. Detach from the PPA. 286 * (This is still ok even in the style 1 case). 287 */ 288 dld_str_detach(dsp); 289 } 290 291 dld_str_destroy(dsp); 292 return (0); 293 } 294 295 /* 296 * qi_qputp: put(9e) 297 */ 298 void 299 dld_wput(queue_t *wq, mblk_t *mp) 300 { 301 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 302 303 DLD_ENTER(dsp); 304 305 switch (DB_TYPE(mp)) { 306 case M_DATA: 307 rw_enter(&dsp->ds_lock, RW_READER); 308 if (dsp->ds_dlstate != DL_IDLE || 309 dsp->ds_mode == DLD_UNITDATA) { 310 freemsg(mp); 311 } else if (dsp->ds_mode == DLD_FASTPATH) { 312 str_mdata_fastpath_put(dsp, mp); 313 } else if (dsp->ds_mode == DLD_RAW) { 314 str_mdata_raw_put(dsp, mp); 315 } 316 rw_exit(&dsp->ds_lock); 317 break; 318 case M_PROTO: 319 case M_PCPROTO: 320 dld_proto(dsp, mp); 321 break; 322 case M_IOCTL: 323 dld_ioc(dsp, mp); 324 break; 325 case M_FLUSH: 326 if (*mp->b_rptr & FLUSHW) { 327 dld_tx_flush(dsp); 328 *mp->b_rptr &= ~FLUSHW; 329 } 330 331 if (*mp->b_rptr & FLUSHR) { 332 qreply(wq, mp); 333 } else { 334 freemsg(mp); 335 } 336 break; 337 default: 338 freemsg(mp); 339 break; 340 } 341 342 DLD_EXIT(dsp); 343 } 344 345 /* 346 * qi_srvp: srv(9e) 347 */ 348 void 349 dld_wsrv(queue_t *wq) 350 { 351 mblk_t *mp; 352 dld_str_t *dsp = wq->q_ptr; 353 354 DLD_ENTER(dsp); 355 rw_enter(&dsp->ds_lock, RW_READER); 356 /* 357 * Grab all packets (chained via b_next) off our transmit queue 358 * and try to send them all to the MAC layer. Since the queue 359 * is independent of streams, we are able to dequeue all messages 360 * at once without looping through getq() and manually chaining 361 * them. Note that the queue size parameters (byte and message 362 * counts) are cleared as well, but we postpone the backenabling 363 * until after the MAC transmit since some packets may end up 364 * back at our transmit queue. 365 */ 366 mutex_enter(&dsp->ds_tx_list_lock); 367 if ((mp = dsp->ds_tx_list_head) == NULL) { 368 ASSERT(!dsp->ds_tx_qbusy); 369 ASSERT(dsp->ds_tx_flow_mp != NULL); 370 ASSERT(dsp->ds_tx_list_head == NULL); 371 ASSERT(dsp->ds_tx_list_tail == NULL); 372 ASSERT(dsp->ds_tx_cnt == 0); 373 ASSERT(dsp->ds_tx_msgcnt == 0); 374 mutex_exit(&dsp->ds_tx_list_lock); 375 goto done; 376 } 377 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 378 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 379 mutex_exit(&dsp->ds_tx_list_lock); 380 381 /* 382 * Discard packets unless we are attached and bound; note that 383 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 384 * because regardless of the mode all transmit will end up in 385 * str_mdata_fastpath_put() where the packets may be queued. 386 */ 387 ASSERT(DB_TYPE(mp) == M_DATA); 388 if (dsp->ds_dlstate != DL_IDLE) { 389 freemsgchain(mp); 390 goto done; 391 } 392 393 /* 394 * Attempt to transmit one or more packets. If the MAC can't 395 * send them all, re-queue the packet(s) at the beginning of 396 * the transmit queue to avoid any re-ordering. 397 */ 398 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 399 dld_tx_enqueue(dsp, mp, B_TRUE); 400 401 /* 402 * Grab the list lock again and check if the transmit queue is 403 * really empty; if so, lift up flow-control and backenable any 404 * writer queues. If the queue is not empty, schedule service 405 * thread to drain it. 406 */ 407 mutex_enter(&dsp->ds_tx_list_lock); 408 if (dsp->ds_tx_list_head == NULL) { 409 dsp->ds_tx_flow_mp = getq(wq); 410 ASSERT(dsp->ds_tx_flow_mp != NULL); 411 dsp->ds_tx_qbusy = B_FALSE; 412 } 413 mutex_exit(&dsp->ds_tx_list_lock); 414 done: 415 rw_exit(&dsp->ds_lock); 416 DLD_EXIT(dsp); 417 } 418 419 void 420 dld_init_ops(struct dev_ops *ops, const char *name) 421 { 422 struct streamtab *stream; 423 struct qinit *rq, *wq; 424 struct module_info *modinfo; 425 426 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 427 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 428 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 429 modinfo->mi_minpsz = 0; 430 modinfo->mi_maxpsz = 64*1024; 431 modinfo->mi_hiwat = 1; 432 modinfo->mi_lowat = 0; 433 434 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 435 rq->qi_qopen = dld_open; 436 rq->qi_qclose = dld_close; 437 rq->qi_minfo = modinfo; 438 439 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 440 wq->qi_putp = (pfi_t)dld_wput; 441 wq->qi_srvp = (pfi_t)dld_wsrv; 442 wq->qi_minfo = modinfo; 443 444 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 445 stream->st_rdinit = rq; 446 stream->st_wrinit = wq; 447 ops->devo_cb_ops->cb_str = stream; 448 449 ops->devo_getinfo = &dld_getinfo; 450 } 451 452 void 453 dld_fini_ops(struct dev_ops *ops) 454 { 455 struct streamtab *stream; 456 struct qinit *rq, *wq; 457 struct module_info *modinfo; 458 459 stream = ops->devo_cb_ops->cb_str; 460 rq = stream->st_rdinit; 461 wq = stream->st_wrinit; 462 modinfo = rq->qi_minfo; 463 ASSERT(wq->qi_minfo == modinfo); 464 465 kmem_free(stream, sizeof (struct streamtab)); 466 kmem_free(wq, sizeof (struct qinit)); 467 kmem_free(rq, sizeof (struct qinit)); 468 kmem_free(modinfo->mi_idname, FMNAMESZ); 469 kmem_free(modinfo, sizeof (struct module_info)); 470 } 471 472 /* 473 * Initialize this module's data structures. 474 */ 475 void 476 dld_str_init(void) 477 { 478 /* 479 * Create dld_str_t object cache. 480 */ 481 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 482 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 483 ASSERT(str_cachep != NULL); 484 485 /* 486 * Allocate a vmem arena to manage minor numbers. The range of the 487 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 488 * minor number). 489 */ 490 minor_arenap = vmem_create("dld_minor_arena", 491 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 492 VM_SLEEP | VMC_IDENTIFIER); 493 ASSERT(minor_arenap != NULL); 494 } 495 496 /* 497 * Tear down this module's data structures. 498 */ 499 int 500 dld_str_fini(void) 501 { 502 /* 503 * Make sure that there are no objects in use. 504 */ 505 if (str_count != 0) 506 return (EBUSY); 507 508 /* 509 * Check to see if there are any minor numbers still in use. 510 */ 511 if (minor_count != 0) 512 return (EBUSY); 513 514 /* 515 * Destroy object cache. 516 */ 517 kmem_cache_destroy(str_cachep); 518 vmem_destroy(minor_arenap); 519 return (0); 520 } 521 522 /* 523 * Create a new dld_str_t object. 524 */ 525 dld_str_t * 526 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 527 { 528 dld_str_t *dsp; 529 530 /* 531 * Allocate an object from the cache. 532 */ 533 atomic_add_32(&str_count, 1); 534 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 535 536 /* 537 * Allocate the dummy mblk for flow-control. 538 */ 539 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 540 if (dsp->ds_tx_flow_mp == NULL) { 541 kmem_cache_free(str_cachep, dsp); 542 atomic_add_32(&str_count, -1); 543 return (NULL); 544 } 545 dsp->ds_type = type; 546 dsp->ds_major = major; 547 dsp->ds_style = style; 548 549 /* 550 * Initialize the queue pointers. 551 */ 552 ASSERT(RD(rq) == rq); 553 dsp->ds_rq = rq; 554 dsp->ds_wq = WR(rq); 555 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 556 557 /* 558 * We want explicit control over our write-side STREAMS queue 559 * where the dummy mblk gets added/removed for flow-control. 560 */ 561 noenable(WR(rq)); 562 563 return (dsp); 564 } 565 566 /* 567 * Destroy a dld_str_t object. 568 */ 569 void 570 dld_str_destroy(dld_str_t *dsp) 571 { 572 queue_t *rq; 573 queue_t *wq; 574 575 /* 576 * Clear the queue pointers. 577 */ 578 rq = dsp->ds_rq; 579 wq = dsp->ds_wq; 580 ASSERT(wq == WR(rq)); 581 582 rq->q_ptr = wq->q_ptr = NULL; 583 dsp->ds_rq = dsp->ds_wq = NULL; 584 585 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 586 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 587 ASSERT(dsp->ds_tx_list_head == NULL); 588 ASSERT(dsp->ds_tx_list_tail == NULL); 589 ASSERT(dsp->ds_tx_cnt == 0); 590 ASSERT(dsp->ds_tx_msgcnt == 0); 591 ASSERT(!dsp->ds_tx_qbusy); 592 593 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 594 ASSERT(dsp->ds_thr == 0); 595 ASSERT(dsp->ds_detach_req == NULL); 596 597 /* 598 * Reinitialize all the flags. 599 */ 600 dsp->ds_notifications = 0; 601 dsp->ds_passivestate = DLD_UNINITIALIZED; 602 dsp->ds_mode = DLD_UNITDATA; 603 604 /* 605 * Free the dummy mblk if exists. 606 */ 607 if (dsp->ds_tx_flow_mp != NULL) { 608 freeb(dsp->ds_tx_flow_mp); 609 dsp->ds_tx_flow_mp = NULL; 610 } 611 /* 612 * Free the object back to the cache. 613 */ 614 kmem_cache_free(str_cachep, dsp); 615 atomic_add_32(&str_count, -1); 616 } 617 618 /* 619 * kmem_cache contructor function: see kmem_cache_create(9f). 620 */ 621 /*ARGSUSED*/ 622 static int 623 str_constructor(void *buf, void *cdrarg, int kmflags) 624 { 625 dld_str_t *dsp = buf; 626 627 bzero(buf, sizeof (dld_str_t)); 628 629 /* 630 * Allocate a new minor number. 631 */ 632 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 633 return (-1); 634 635 /* 636 * Initialize the DLPI state machine. 637 */ 638 dsp->ds_dlstate = DL_UNATTACHED; 639 640 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 641 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 642 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 643 644 return (0); 645 } 646 647 /* 648 * kmem_cache destructor function. 649 */ 650 /*ARGSUSED*/ 651 static void 652 str_destructor(void *buf, void *cdrarg) 653 { 654 dld_str_t *dsp = buf; 655 656 /* 657 * Make sure the DLPI state machine was reset. 658 */ 659 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 660 661 /* 662 * Make sure the data-link interface was closed. 663 */ 664 ASSERT(dsp->ds_mh == NULL); 665 ASSERT(dsp->ds_dc == NULL); 666 667 /* 668 * Make sure enabled notifications are cleared. 669 */ 670 ASSERT(dsp->ds_notifications == 0); 671 672 /* 673 * Make sure polling is disabled. 674 */ 675 ASSERT(!dsp->ds_polling); 676 677 /* 678 * Release the minor number. 679 */ 680 dld_minor_rele(dsp->ds_minor); 681 682 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 683 rw_destroy(&dsp->ds_lock); 684 685 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 686 mutex_destroy(&dsp->ds_tx_list_lock); 687 ASSERT(dsp->ds_tx_flow_mp == NULL); 688 689 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 690 mutex_destroy(&dsp->ds_thr_lock); 691 ASSERT(dsp->ds_detach_req == NULL); 692 } 693 694 /* 695 * M_DATA put (IP fast-path mode) 696 */ 697 void 698 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 699 { 700 /* 701 * This function can be called from within dld or from an upper 702 * layer protocol (currently only tcp). If we are in the busy 703 * mode enqueue the packet(s) and return. Otherwise hand them 704 * over to the MAC driver for transmission; any remaining one(s) 705 * which didn't get sent will be queued. 706 * 707 * Note here that we don't grab the list lock prior to checking 708 * the busy flag. This is okay, because a missed transition 709 * will not cause any packet reordering for any particular TCP 710 * connection (which is single-threaded). The enqueue routine 711 * will atomically set the busy flag and schedule the service 712 * thread to run; the flag is only cleared by the service thread 713 * when there is no more packet to be transmitted. 714 */ 715 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 716 dld_tx_enqueue(dsp, mp, B_FALSE); 717 } 718 719 /* 720 * M_DATA put (raw mode) 721 */ 722 void 723 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 724 { 725 struct ether_header *ehp; 726 mblk_t *bp; 727 size_t size; 728 size_t hdrlen; 729 730 size = MBLKL(mp); 731 if (size < sizeof (struct ether_header)) 732 goto discard; 733 734 hdrlen = sizeof (struct ether_header); 735 736 ehp = (struct ether_header *)mp->b_rptr; 737 if (ntohs(ehp->ether_type) == VLAN_TPID) { 738 struct ether_vlan_header *evhp; 739 740 if (size < sizeof (struct ether_vlan_header)) 741 goto discard; 742 743 /* 744 * Replace vtag with our own 745 */ 746 evhp = (struct ether_vlan_header *)ehp; 747 evhp->ether_tci = htons(VLAN_TCI(dsp->ds_pri, 748 ETHER_CFI, dsp->ds_vid)); 749 hdrlen = sizeof (struct ether_vlan_header); 750 } 751 752 /* 753 * Check the packet is not too big and that any remaining 754 * fragment list is composed entirely of M_DATA messages. (We 755 * know the first fragment was M_DATA otherwise we could not 756 * have got here). 757 */ 758 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 759 if (DB_TYPE(bp) != M_DATA) 760 goto discard; 761 size += MBLKL(bp); 762 } 763 764 if (size > dsp->ds_mip->mi_sdu_max + hdrlen) 765 goto discard; 766 767 str_mdata_fastpath_put(dsp, mp); 768 return; 769 770 discard: 771 freemsg(mp); 772 } 773 774 /* 775 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 776 */ 777 int 778 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 779 { 780 int err; 781 const char *drvname; 782 char name[MAXNAMELEN]; 783 dls_channel_t dc; 784 uint_t addr_length; 785 786 ASSERT(dsp->ds_dc == NULL); 787 788 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 789 return (EINVAL); 790 791 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 792 793 if (strcmp(drvname, "aggr") != 0 && 794 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 795 return (EINVAL); 796 797 /* 798 * Open a channel. 799 */ 800 if ((err = dls_open(name, &dc)) != 0) { 801 (void) qassociate(dsp->ds_wq, -1); 802 return (err); 803 } 804 805 /* 806 * Cache the MAC interface handle, a pointer to the immutable MAC 807 * information and the current and 'factory' MAC address. 808 */ 809 dsp->ds_mh = dls_mac(dc); 810 dsp->ds_mip = mac_info(dsp->ds_mh); 811 812 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 813 814 addr_length = dsp->ds_mip->mi_addr_length; 815 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 816 817 /* 818 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 819 * a non-VLAN interface). 820 */ 821 dsp->ds_vid = dls_vid(dc); 822 823 /* 824 * Set the default packet priority. 825 */ 826 dsp->ds_pri = 0; 827 828 /* 829 * Add a notify function so that the we get updates from the MAC. 830 */ 831 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 832 833 dsp->ds_dc = dc; 834 dsp->ds_dlstate = DL_UNBOUND; 835 836 return (0); 837 } 838 839 /* 840 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 841 * from close(2) for style 2. 842 */ 843 void 844 dld_str_detach(dld_str_t *dsp) 845 { 846 ASSERT(dsp->ds_thr == 0); 847 848 /* 849 * Remove the notify function. 850 */ 851 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 852 853 /* 854 * Re-initialize the DLPI state machine. 855 */ 856 dsp->ds_dlstate = DL_UNATTACHED; 857 858 /* 859 * Clear the polling and promisc flags. 860 */ 861 dsp->ds_polling = B_FALSE; 862 dsp->ds_promisc = 0; 863 864 /* 865 * Close the channel. 866 */ 867 dls_close(dsp->ds_dc); 868 dsp->ds_dc = NULL; 869 dsp->ds_mh = NULL; 870 871 (void) qassociate(dsp->ds_wq, -1); 872 } 873 874 /* 875 * Raw mode receive function. 876 */ 877 /*ARGSUSED*/ 878 void 879 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 880 size_t header_length) 881 { 882 dld_str_t *dsp = (dld_str_t *)arg; 883 mblk_t *next; 884 885 ASSERT(mp != NULL); 886 do { 887 /* 888 * Get the pointer to the next packet in the chain and then 889 * clear b_next before the packet gets passed on. 890 */ 891 next = mp->b_next; 892 mp->b_next = NULL; 893 894 /* 895 * Wind back b_rptr to point at the MAC header. 896 */ 897 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 898 mp->b_rptr -= header_length; 899 if (header_length == sizeof (struct ether_vlan_header)) { 900 /* 901 * Strip off the vtag 902 */ 903 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 904 2 * ETHERADDRL); 905 mp->b_rptr += VLAN_TAGSZ; 906 } 907 908 /* 909 * Pass the packet on. 910 */ 911 putnext(dsp->ds_rq, mp); 912 913 /* 914 * Move on to the next packet in the chain. 915 */ 916 mp = next; 917 } while (mp != NULL); 918 } 919 920 /* 921 * Fast-path receive function. 922 */ 923 /*ARGSUSED*/ 924 void 925 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 926 size_t header_length) 927 { 928 dld_str_t *dsp = (dld_str_t *)arg; 929 mblk_t *next; 930 931 ASSERT(mp != NULL); 932 do { 933 /* 934 * Get the pointer to the next packet in the chain and then 935 * clear b_next before the packet gets passed on. 936 */ 937 next = mp->b_next; 938 mp->b_next = NULL; 939 940 /* 941 * Pass the packet on. 942 */ 943 putnext(dsp->ds_rq, mp); 944 945 /* 946 * Move on to the next packet in the chain. 947 */ 948 mp = next; 949 } while (mp != NULL); 950 } 951 952 /* 953 * Default receive function (send DL_UNITDATA_IND messages). 954 */ 955 /*ARGSUSED*/ 956 void 957 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 958 size_t header_length) 959 { 960 dld_str_t *dsp = (dld_str_t *)arg; 961 mblk_t *ud_mp; 962 mblk_t *next; 963 964 ASSERT(mp != NULL); 965 do { 966 /* 967 * Get the pointer to the next packet in the chain and then 968 * clear b_next before the packet gets passed on. 969 */ 970 next = mp->b_next; 971 mp->b_next = NULL; 972 973 /* 974 * Wind back b_rptr to point at the MAC header. 975 */ 976 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 977 mp->b_rptr -= header_length; 978 979 /* 980 * Create the DL_UNITDATA_IND M_PROTO. 981 */ 982 if ((ud_mp = str_unitdata_ind(dsp, mp)) == NULL) { 983 freemsgchain(mp); 984 return; 985 } 986 987 /* 988 * Advance b_rptr to point at the payload again. 989 */ 990 mp->b_rptr += header_length; 991 992 /* 993 * Prepend the DL_UNITDATA_IND. 994 */ 995 ud_mp->b_cont = mp; 996 997 /* 998 * Send the message. 999 */ 1000 putnext(dsp->ds_rq, ud_mp); 1001 1002 /* 1003 * Move on to the next packet in the chain. 1004 */ 1005 mp = next; 1006 } while (mp != NULL); 1007 } 1008 1009 /* 1010 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1011 * current state of the interface. 1012 */ 1013 void 1014 dld_str_notify_ind(dld_str_t *dsp) 1015 { 1016 mac_notify_type_t type; 1017 1018 for (type = 0; type < MAC_NNOTE; type++) 1019 str_notify(dsp, type); 1020 } 1021 1022 typedef struct dl_unitdata_ind_wrapper { 1023 dl_unitdata_ind_t dl_unitdata; 1024 uint8_t dl_dest_addr[MAXADDRLEN + sizeof (uint16_t)]; 1025 uint8_t dl_src_addr[MAXADDRLEN + sizeof (uint16_t)]; 1026 } dl_unitdata_ind_wrapper_t; 1027 1028 /* 1029 * Create a DL_UNITDATA_IND M_PROTO message. 1030 */ 1031 static mblk_t * 1032 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp) 1033 { 1034 mblk_t *nmp; 1035 dl_unitdata_ind_wrapper_t *dlwp; 1036 dl_unitdata_ind_t *dlp; 1037 dls_header_info_t dhi; 1038 uint_t addr_length; 1039 uint8_t *daddr; 1040 uint8_t *saddr; 1041 1042 /* 1043 * Get the packet header information. 1044 */ 1045 dls_header_info(dsp->ds_dc, mp, &dhi); 1046 1047 /* 1048 * Allocate a message large enough to contain the wrapper structure 1049 * defined above. 1050 */ 1051 if ((nmp = mexchange(dsp->ds_wq, NULL, 1052 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1053 DL_UNITDATA_IND)) == NULL) 1054 return (NULL); 1055 1056 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1057 1058 dlp = &(dlwp->dl_unitdata); 1059 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1060 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1061 1062 /* 1063 * Copy in the destination address. 1064 */ 1065 addr_length = dsp->ds_mip->mi_addr_length; 1066 daddr = dlwp->dl_dest_addr; 1067 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1068 bcopy(dhi.dhi_daddr, daddr, addr_length); 1069 1070 /* 1071 * Set the destination DLSAP to our bound DLSAP value. 1072 */ 1073 *(uint16_t *)(daddr + addr_length) = dsp->ds_sap; 1074 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1075 1076 /* 1077 * If the destination address was a group address then 1078 * dl_group_address field should be non-zero. 1079 */ 1080 dlp->dl_group_address = dhi.dhi_isgroup; 1081 1082 /* 1083 * Copy in the source address. 1084 */ 1085 saddr = dlwp->dl_src_addr; 1086 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1087 bcopy(dhi.dhi_saddr, saddr, addr_length); 1088 1089 /* 1090 * Set the source DLSAP to the packet ethertype. 1091 */ 1092 *(uint16_t *)(saddr + addr_length) = dhi.dhi_ethertype; 1093 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1094 1095 return (nmp); 1096 } 1097 1098 /* 1099 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1100 */ 1101 static void 1102 str_notify_promisc_on_phys(dld_str_t *dsp) 1103 { 1104 mblk_t *mp; 1105 dl_notify_ind_t *dlip; 1106 1107 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1108 return; 1109 1110 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1111 M_PROTO, 0)) == NULL) 1112 return; 1113 1114 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1115 dlip = (dl_notify_ind_t *)mp->b_rptr; 1116 dlip->dl_primitive = DL_NOTIFY_IND; 1117 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1118 1119 qreply(dsp->ds_wq, mp); 1120 } 1121 1122 /* 1123 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1124 */ 1125 static void 1126 str_notify_promisc_off_phys(dld_str_t *dsp) 1127 { 1128 mblk_t *mp; 1129 dl_notify_ind_t *dlip; 1130 1131 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1132 return; 1133 1134 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1135 M_PROTO, 0)) == NULL) 1136 return; 1137 1138 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1139 dlip = (dl_notify_ind_t *)mp->b_rptr; 1140 dlip->dl_primitive = DL_NOTIFY_IND; 1141 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1142 1143 qreply(dsp->ds_wq, mp); 1144 } 1145 1146 /* 1147 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1148 */ 1149 static void 1150 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1151 { 1152 mblk_t *mp; 1153 dl_notify_ind_t *dlip; 1154 uint_t addr_length; 1155 uint16_t ethertype; 1156 1157 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1158 return; 1159 1160 addr_length = dsp->ds_mip->mi_addr_length; 1161 if ((mp = mexchange(dsp->ds_wq, NULL, 1162 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1163 M_PROTO, 0)) == NULL) 1164 return; 1165 1166 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1167 dlip = (dl_notify_ind_t *)mp->b_rptr; 1168 dlip->dl_primitive = DL_NOTIFY_IND; 1169 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1170 dlip->dl_data = DL_CURR_PHYS_ADDR; 1171 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1172 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1173 1174 bcopy(addr, &dlip[1], addr_length); 1175 1176 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1177 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1178 ethertype; 1179 1180 qreply(dsp->ds_wq, mp); 1181 } 1182 1183 /* 1184 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1185 */ 1186 static void 1187 str_notify_link_up(dld_str_t *dsp) 1188 { 1189 mblk_t *mp; 1190 dl_notify_ind_t *dlip; 1191 1192 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1193 return; 1194 1195 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1196 M_PROTO, 0)) == NULL) 1197 return; 1198 1199 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1200 dlip = (dl_notify_ind_t *)mp->b_rptr; 1201 dlip->dl_primitive = DL_NOTIFY_IND; 1202 dlip->dl_notification = DL_NOTE_LINK_UP; 1203 1204 qreply(dsp->ds_wq, mp); 1205 } 1206 1207 /* 1208 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1209 */ 1210 static void 1211 str_notify_link_down(dld_str_t *dsp) 1212 { 1213 mblk_t *mp; 1214 dl_notify_ind_t *dlip; 1215 1216 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1217 return; 1218 1219 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1220 M_PROTO, 0)) == NULL) 1221 return; 1222 1223 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1224 dlip = (dl_notify_ind_t *)mp->b_rptr; 1225 dlip->dl_primitive = DL_NOTIFY_IND; 1226 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1227 1228 qreply(dsp->ds_wq, mp); 1229 } 1230 1231 /* 1232 * DL_NOTIFY_IND: DL_NOTE_SPEED 1233 */ 1234 static void 1235 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1236 { 1237 mblk_t *mp; 1238 dl_notify_ind_t *dlip; 1239 1240 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1241 return; 1242 1243 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1244 M_PROTO, 0)) == NULL) 1245 return; 1246 1247 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1248 dlip = (dl_notify_ind_t *)mp->b_rptr; 1249 dlip->dl_primitive = DL_NOTIFY_IND; 1250 dlip->dl_notification = DL_NOTE_SPEED; 1251 dlip->dl_data = speed; 1252 1253 qreply(dsp->ds_wq, mp); 1254 } 1255 1256 /* 1257 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1258 */ 1259 static void 1260 str_notify_capab_reneg(dld_str_t *dsp) 1261 { 1262 mblk_t *mp; 1263 dl_notify_ind_t *dlip; 1264 1265 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1266 return; 1267 1268 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1269 M_PROTO, 0)) == NULL) 1270 return; 1271 1272 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1273 dlip = (dl_notify_ind_t *)mp->b_rptr; 1274 dlip->dl_primitive = DL_NOTIFY_IND; 1275 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1276 1277 qreply(dsp->ds_wq, mp); 1278 } 1279 1280 /* 1281 * MAC notification callback. 1282 */ 1283 static void 1284 str_notify(void *arg, mac_notify_type_t type) 1285 { 1286 dld_str_t *dsp = (dld_str_t *)arg; 1287 queue_t *q = dsp->ds_wq; 1288 1289 switch (type) { 1290 case MAC_NOTE_TX: 1291 qenable(q); 1292 break; 1293 1294 case MAC_NOTE_DEVPROMISC: 1295 /* 1296 * Send the appropriate DL_NOTIFY_IND. 1297 */ 1298 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1299 str_notify_promisc_on_phys(dsp); 1300 else 1301 str_notify_promisc_off_phys(dsp); 1302 break; 1303 1304 case MAC_NOTE_PROMISC: 1305 break; 1306 1307 case MAC_NOTE_UNICST: 1308 /* 1309 * This notification is sent whenever the MAC unicast address 1310 * changes. We need to re-cache the address. 1311 */ 1312 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1313 1314 /* 1315 * Send the appropriate DL_NOTIFY_IND. 1316 */ 1317 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1318 break; 1319 1320 case MAC_NOTE_LINK: 1321 /* 1322 * This notification is sent every time the MAC driver 1323 * updates the link state. 1324 */ 1325 switch (mac_link_get(dsp->ds_mh)) { 1326 case LINK_STATE_UP: 1327 /* 1328 * The link is up so send the appropriate 1329 * DL_NOTIFY_IND. 1330 */ 1331 str_notify_link_up(dsp); 1332 1333 /* 1334 * If we can find the link speed then send a 1335 * DL_NOTIFY_IND for that too. 1336 */ 1337 if (dsp->ds_mip->mi_stat[MAC_STAT_IFSPEED]) { 1338 uint64_t val; 1339 1340 val = mac_stat_get(dsp->ds_mh, 1341 MAC_STAT_IFSPEED); 1342 str_notify_speed(dsp, 1343 (uint32_t)(val / 1000ull)); 1344 } 1345 break; 1346 1347 case LINK_STATE_DOWN: 1348 /* 1349 * The link is down so send the appropriate 1350 * DL_NOTIFY_IND. 1351 */ 1352 str_notify_link_down(dsp); 1353 break; 1354 1355 default: 1356 break; 1357 } 1358 break; 1359 1360 case MAC_NOTE_RESOURCE: 1361 /* 1362 * This notification is sent whenever the MAC resources 1363 * change. We need to renegotiate the capabilities. 1364 * Send the appropriate DL_NOTIFY_IND. 1365 */ 1366 str_notify_capab_reneg(dsp); 1367 break; 1368 1369 default: 1370 ASSERT(B_FALSE); 1371 break; 1372 } 1373 } 1374 1375 /* 1376 * Enqueue one or more messages to the transmit queue. 1377 * Caller specifies the insertion position (head/tail). 1378 */ 1379 void 1380 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1381 { 1382 mblk_t *tail; 1383 queue_t *q = dsp->ds_wq; 1384 uint_t cnt, msgcnt; 1385 uint_t tot_cnt, tot_msgcnt; 1386 1387 ASSERT(DB_TYPE(mp) == M_DATA); 1388 /* Calculate total size and count of the packet(s) */ 1389 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1390 tail->b_next != NULL; tail = tail->b_next) { 1391 ASSERT(DB_TYPE(tail) == M_DATA); 1392 cnt += msgdsize(tail); 1393 msgcnt++; 1394 } 1395 1396 mutex_enter(&dsp->ds_tx_list_lock); 1397 /* 1398 * If the queue depth would exceed the allowed threshold, drop 1399 * new packet(s) and drain those already in the queue. 1400 */ 1401 tot_cnt = dsp->ds_tx_cnt + cnt; 1402 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1403 1404 if (!head_insert && 1405 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1406 ASSERT(dsp->ds_tx_qbusy); 1407 mutex_exit(&dsp->ds_tx_list_lock); 1408 freemsgchain(mp); 1409 goto done; 1410 } 1411 1412 /* Update the queue size parameters */ 1413 dsp->ds_tx_cnt = tot_cnt; 1414 dsp->ds_tx_msgcnt = tot_msgcnt; 1415 1416 /* 1417 * If the transmit queue is currently empty and we are 1418 * about to deposit the packet(s) there, switch mode to 1419 * "busy" and raise flow-control condition. 1420 */ 1421 if (!dsp->ds_tx_qbusy) { 1422 dsp->ds_tx_qbusy = B_TRUE; 1423 ASSERT(dsp->ds_tx_flow_mp != NULL); 1424 (void) putq(q, dsp->ds_tx_flow_mp); 1425 dsp->ds_tx_flow_mp = NULL; 1426 } 1427 1428 if (!head_insert) { 1429 /* Tail insertion */ 1430 if (dsp->ds_tx_list_head == NULL) 1431 dsp->ds_tx_list_head = mp; 1432 else 1433 dsp->ds_tx_list_tail->b_next = mp; 1434 dsp->ds_tx_list_tail = tail; 1435 } else { 1436 /* Head insertion */ 1437 tail->b_next = dsp->ds_tx_list_head; 1438 if (dsp->ds_tx_list_head == NULL) 1439 dsp->ds_tx_list_tail = tail; 1440 dsp->ds_tx_list_head = mp; 1441 } 1442 mutex_exit(&dsp->ds_tx_list_lock); 1443 done: 1444 /* Schedule service thread to drain the transmit queue */ 1445 qenable(q); 1446 } 1447 1448 void 1449 dld_tx_flush(dld_str_t *dsp) 1450 { 1451 mutex_enter(&dsp->ds_tx_list_lock); 1452 if (dsp->ds_tx_list_head != NULL) { 1453 freemsgchain(dsp->ds_tx_list_head); 1454 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1455 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1456 if (dsp->ds_tx_qbusy) { 1457 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1458 ASSERT(dsp->ds_tx_flow_mp != NULL); 1459 dsp->ds_tx_qbusy = B_FALSE; 1460 } 1461 } 1462 mutex_exit(&dsp->ds_tx_list_lock); 1463 } 1464 1465 /* 1466 * Process an M_IOCTL message. 1467 */ 1468 static void 1469 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1470 { 1471 uint_t cmd; 1472 1473 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1474 ASSERT(dsp->ds_type == DLD_DLPI); 1475 1476 switch (cmd) { 1477 case DLIOCRAW: 1478 ioc_raw(dsp, mp); 1479 break; 1480 case DLIOCHDRINFO: 1481 ioc_fast(dsp, mp); 1482 break; 1483 default: 1484 ioc(dsp, mp); 1485 } 1486 } 1487 1488 /* 1489 * DLIOCRAW 1490 */ 1491 static void 1492 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1493 { 1494 queue_t *q = dsp->ds_wq; 1495 1496 rw_enter(&dsp->ds_lock, RW_WRITER); 1497 if (dsp->ds_polling) { 1498 rw_exit(&dsp->ds_lock); 1499 miocnak(q, mp, 0, EPROTO); 1500 return; 1501 } 1502 1503 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1504 /* 1505 * Set the receive callback. 1506 */ 1507 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1508 1509 /* 1510 * Note that raw mode is enabled. 1511 */ 1512 dsp->ds_mode = DLD_RAW; 1513 } 1514 1515 rw_exit(&dsp->ds_lock); 1516 miocack(q, mp, 0, 0); 1517 } 1518 1519 /* 1520 * DLIOCHDRINFO 1521 */ 1522 static void 1523 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1524 { 1525 dl_unitdata_req_t *dlp; 1526 off_t off; 1527 size_t len; 1528 const uint8_t *addr; 1529 uint16_t sap; 1530 mblk_t *nmp; 1531 mblk_t *hmp; 1532 uint_t addr_length; 1533 queue_t *q = dsp->ds_wq; 1534 int err; 1535 dls_channel_t dc; 1536 1537 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1538 err = ENOTSUP; 1539 goto failed; 1540 } 1541 1542 nmp = mp->b_cont; 1543 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1544 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1545 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1546 err = EINVAL; 1547 goto failed; 1548 } 1549 1550 off = dlp->dl_dest_addr_offset; 1551 len = dlp->dl_dest_addr_length; 1552 1553 if (!MBLKIN(nmp, off, len)) { 1554 err = EINVAL; 1555 goto failed; 1556 } 1557 1558 rw_enter(&dsp->ds_lock, RW_READER); 1559 if (dsp->ds_dlstate != DL_IDLE) { 1560 rw_exit(&dsp->ds_lock); 1561 err = ENOTSUP; 1562 goto failed; 1563 } 1564 1565 addr_length = dsp->ds_mip->mi_addr_length; 1566 if (len != addr_length + sizeof (uint16_t)) { 1567 rw_exit(&dsp->ds_lock); 1568 err = EINVAL; 1569 goto failed; 1570 } 1571 1572 addr = nmp->b_rptr + off; 1573 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 1574 dc = dsp->ds_dc; 1575 1576 if ((hmp = dls_header(dc, addr, sap, dsp->ds_pri)) == NULL) { 1577 rw_exit(&dsp->ds_lock); 1578 err = ENOMEM; 1579 goto failed; 1580 } 1581 1582 /* 1583 * This is a performance optimization. We originally entered 1584 * as reader and only become writer upon transitioning into 1585 * the DLD_FASTPATH mode for the first time. Otherwise we 1586 * stay as reader and return the fast-path header to IP. 1587 */ 1588 if (dsp->ds_mode != DLD_FASTPATH) { 1589 if (!rw_tryupgrade(&dsp->ds_lock)) { 1590 rw_exit(&dsp->ds_lock); 1591 rw_enter(&dsp->ds_lock, RW_WRITER); 1592 1593 /* 1594 * State may have changed before we re-acquired 1595 * the writer lock in case the upgrade failed. 1596 */ 1597 if (dsp->ds_dlstate != DL_IDLE) { 1598 rw_exit(&dsp->ds_lock); 1599 err = ENOTSUP; 1600 goto failed; 1601 } 1602 } 1603 1604 /* 1605 * Set the receive callback (unless polling is enabled). 1606 */ 1607 if (!dsp->ds_polling) 1608 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 1609 1610 /* 1611 * Note that fast-path mode is enabled. 1612 */ 1613 dsp->ds_mode = DLD_FASTPATH; 1614 } 1615 rw_exit(&dsp->ds_lock); 1616 1617 freemsg(nmp->b_cont); 1618 nmp->b_cont = hmp; 1619 1620 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 1621 return; 1622 failed: 1623 miocnak(q, mp, 0, err); 1624 } 1625 1626 /* 1627 * Catch-all handler. 1628 */ 1629 static void 1630 ioc(dld_str_t *dsp, mblk_t *mp) 1631 { 1632 queue_t *q = dsp->ds_wq; 1633 mac_handle_t mh; 1634 1635 rw_enter(&dsp->ds_lock, RW_READER); 1636 if (dsp->ds_dlstate == DL_UNATTACHED) { 1637 rw_exit(&dsp->ds_lock); 1638 miocnak(q, mp, 0, EINVAL); 1639 return; 1640 } 1641 mh = dsp->ds_mh; 1642 ASSERT(mh != NULL); 1643 rw_exit(&dsp->ds_lock); 1644 mac_ioctl(mh, q, mp); 1645 } 1646 1647 /* 1648 * Allocate a new minor number. 1649 */ 1650 static minor_t 1651 dld_minor_hold(boolean_t sleep) 1652 { 1653 minor_t minor; 1654 1655 /* 1656 * Grab a value from the arena. 1657 */ 1658 atomic_add_32(&minor_count, 1); 1659 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 1660 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 1661 atomic_add_32(&minor_count, -1); 1662 return (0); 1663 } 1664 1665 return (minor); 1666 } 1667 1668 /* 1669 * Release a previously allocated minor number. 1670 */ 1671 static void 1672 dld_minor_rele(minor_t minor) 1673 { 1674 /* 1675 * Return the value to the arena. 1676 */ 1677 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 1678 1679 atomic_add_32(&minor_count, -1); 1680 } 1681