1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * Data-Link Driver 30 */ 31 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/strsubr.h> 35 #include <sys/atomic.h> 36 #include <sys/mkdev.h> 37 #include <sys/vlan.h> 38 #include <sys/dld.h> 39 #include <sys/dld_impl.h> 40 #include <sys/dls_impl.h> 41 #include <inet/common.h> 42 43 static int str_constructor(void *, void *, int); 44 static void str_destructor(void *, void *); 45 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *); 46 static void str_notify_promisc_on_phys(dld_str_t *); 47 static void str_notify_promisc_off_phys(dld_str_t *); 48 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 49 static void str_notify_link_up(dld_str_t *); 50 static void str_notify_link_down(dld_str_t *); 51 static void str_notify_capab_reneg(dld_str_t *); 52 static void str_notify_speed(dld_str_t *, uint32_t); 53 static void str_notify(void *, mac_notify_type_t); 54 55 static void ioc_raw(dld_str_t *, mblk_t *); 56 static void ioc_fast(dld_str_t *, mblk_t *); 57 static void ioc(dld_str_t *, mblk_t *); 58 static void dld_ioc(dld_str_t *, mblk_t *); 59 static minor_t dld_minor_hold(boolean_t); 60 static void dld_minor_rele(minor_t); 61 62 static uint32_t str_count; 63 static kmem_cache_t *str_cachep; 64 static vmem_t *minor_arenap; 65 static uint32_t minor_count; 66 67 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 68 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 69 70 /* 71 * Some notes on entry points, flow-control, queueing and locking: 72 * 73 * This driver exports the traditional STREAMS put entry point as well as 74 * the non-STREAMS fast-path transmit routine which is provided to IP via 75 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 76 * and data operations, while the fast-path routine deals only with M_DATA 77 * fast-path packets. Regardless of the entry point, all outbound packets 78 * will end up in str_mdata_fastpath_put(), where they will be delivered to 79 * the MAC driver. 80 * 81 * The transmit logic operates in two modes: a "not busy" mode where the 82 * packets will be delivered to the MAC for a send attempt, or "busy" mode 83 * where they will be enqueued in the internal queue because of flow-control. 84 * Flow-control happens when the MAC driver indicates the packets couldn't 85 * be transmitted due to lack of resources (e.g. running out of descriptors). 86 * In such case, the driver will place a dummy message on its write-side 87 * STREAMS queue so that the queue is marked as "full". Any subsequent 88 * packets arriving at the driver will be enqueued in the internal queue, 89 * which is drained in the context of the service thread that gets scheduled 90 * whenever the driver is in the "busy" mode. When all packets have been 91 * successfully delivered by MAC and the internal queue is empty, it will 92 * transition to the "not busy" mode by removing the dummy message from the 93 * write-side STREAMS queue; in effect this will trigger backenabling. 94 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 95 * to the above reasons. 96 * 97 * The driver implements an internal transmit queue independent of STREAMS. 98 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 99 * compared to the putq() and get() STREAMS interfaces. The only putq() and 100 * getq() operations done by the driver are those related to placing and 101 * removing the dummy message to/from the write-side STREAMS queue for flow- 102 * control purposes. 103 * 104 * Locking is done independent of STREAMS due to the driver being fully MT. 105 * Threads entering the driver (either from put or service entry points) 106 * will most likely be readers, with the exception of a few writer cases 107 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 108 * DLD-related ioctl requests. The DLPI detach case is special, because 109 * it involves freeing resources and therefore must be single-threaded. 110 * Unfortunately the readers/writers lock can't be used to protect against 111 * it, because the lock is dropped prior to the driver calling places where 112 * putnext() may be invoked, and such places may depend on those resources 113 * to exist. Because of this, the driver always completes the DLPI detach 114 * process when there are no other threads running in the driver. This is 115 * done by keeping track of the number of threads, such that the the last 116 * thread leaving the driver will finish the pending DLPI detach operation. 117 */ 118 119 /* 120 * dld_max_q_count is the queue depth threshold used to limit the number of 121 * outstanding packets or bytes allowed in the queue; once this limit is 122 * reached the driver will free any incoming ones until the queue depth 123 * drops below the threshold. 124 * 125 * This buffering is provided to accomodate clients which do not employ 126 * their own buffering scheme, and to handle occasional packet bursts. 127 * Clients which handle their own buffering will receive positive feedback 128 * from this driver as soon as it transitions into the "busy" state, i.e. 129 * when the queue is initially filled up; they will get backenabled once 130 * the queue is empty. 131 * 132 * The value chosen here is rather arbitrary; in future some intelligent 133 * heuristics may be involved which could take into account the hardware's 134 * transmit ring size, etc. 135 */ 136 uint_t dld_max_q_count = (16 * 1024 *1024); 137 138 static dev_info_t * 139 dld_finddevinfo(dev_t dev) 140 { 141 minor_t minor = getminor(dev); 142 char *drvname = ddi_major_to_name(getmajor(dev)); 143 char name[MAXNAMELEN]; 144 dls_vlan_t *dvp = NULL; 145 dev_info_t *dip = NULL; 146 147 if (drvname == NULL || minor == 0 || minor > DLD_MAX_PPA + 1) 148 return (NULL); 149 150 (void) snprintf(name, MAXNAMELEN, "%s%d", drvname, (int)minor - 1); 151 if (dls_vlan_hold(name, &dvp, B_FALSE) != 0) 152 return (NULL); 153 154 dip = mac_devinfo_get(dvp->dv_dlp->dl_mh); 155 dls_vlan_rele(dvp); 156 return (dip); 157 } 158 159 /* 160 * devo_getinfo: getinfo(9e) 161 */ 162 /*ARGSUSED*/ 163 int 164 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 165 { 166 dev_info_t *devinfo; 167 minor_t minor = getminor((dev_t)arg); 168 int rc = DDI_FAILURE; 169 170 switch (cmd) { 171 case DDI_INFO_DEVT2DEVINFO: 172 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 173 *(dev_info_t **)resp = devinfo; 174 rc = DDI_SUCCESS; 175 } 176 break; 177 case DDI_INFO_DEVT2INSTANCE: 178 if (minor > 0 && minor <= DLD_MAX_PPA + 1) { 179 *resp = (void *)(minor - 1); 180 rc = DDI_SUCCESS; 181 } 182 break; 183 } 184 return (rc); 185 } 186 187 /* 188 * qi_qopen: open(9e) 189 */ 190 /*ARGSUSED*/ 191 int 192 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 193 { 194 dld_str_t *dsp; 195 major_t major; 196 minor_t minor; 197 int err; 198 199 if (sflag == MODOPEN) 200 return (ENOTSUP); 201 202 /* 203 * This is a cloning driver and therefore each queue should only 204 * ever get opened once. 205 */ 206 if (rq->q_ptr != NULL) 207 return (EBUSY); 208 209 major = getmajor(*devp); 210 minor = getminor(*devp); 211 if (minor > DLD_MAX_MINOR) 212 return (ENODEV); 213 214 /* 215 * Create a new dld_str_t for the stream. This will grab a new minor 216 * number that will be handed back in the cloned dev_t. Creation may 217 * fail if we can't allocate the dummy mblk used for flow-control. 218 */ 219 dsp = dld_str_create(rq, DLD_DLPI, major, 220 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 221 if (dsp == NULL) 222 return (ENOSR); 223 224 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 225 if (minor != 0) { 226 /* 227 * Style 1 open 228 */ 229 230 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 231 goto failed; 232 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 233 } else { 234 (void) qassociate(rq, -1); 235 } 236 237 /* 238 * Enable the queue srv(9e) routine. 239 */ 240 qprocson(rq); 241 242 /* 243 * Construct a cloned dev_t to hand back. 244 */ 245 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 246 return (0); 247 248 failed: 249 dld_str_destroy(dsp); 250 return (err); 251 } 252 253 /* 254 * qi_qclose: close(9e) 255 */ 256 int 257 dld_close(queue_t *rq) 258 { 259 dld_str_t *dsp = rq->q_ptr; 260 261 /* 262 * Wait until pending requests are processed. 263 */ 264 mutex_enter(&dsp->ds_thr_lock); 265 while (dsp->ds_pending_cnt > 0) 266 cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); 267 mutex_exit(&dsp->ds_thr_lock); 268 269 /* 270 * Disable the queue srv(9e) routine. 271 */ 272 qprocsoff(rq); 273 274 /* 275 * At this point we can not be entered by any threads via STREAMS 276 * or the direct call interface, which is available only to IP. 277 * After the interface is unplumbed, IP wouldn't have any reference 278 * to this instance, and therefore we are now effectively single 279 * threaded and don't require any lock protection. Flush all 280 * pending packets which are sitting in the transmit queue. 281 */ 282 ASSERT(dsp->ds_thr == 0); 283 dld_tx_flush(dsp); 284 285 /* 286 * This stream was open to a provider node. Check to see 287 * if it has been cleanly shut down. 288 */ 289 if (dsp->ds_dlstate != DL_UNATTACHED) { 290 /* 291 * The stream is either open to a style 1 provider or 292 * this is not clean shutdown. Detach from the PPA. 293 * (This is still ok even in the style 1 case). 294 */ 295 dld_str_detach(dsp); 296 } 297 298 dld_str_destroy(dsp); 299 return (0); 300 } 301 302 /* 303 * qi_qputp: put(9e) 304 */ 305 void 306 dld_wput(queue_t *wq, mblk_t *mp) 307 { 308 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 309 310 DLD_ENTER(dsp); 311 312 switch (DB_TYPE(mp)) { 313 case M_DATA: 314 rw_enter(&dsp->ds_lock, RW_READER); 315 if (dsp->ds_dlstate != DL_IDLE || 316 dsp->ds_mode == DLD_UNITDATA) { 317 freemsg(mp); 318 } else if (dsp->ds_mode == DLD_FASTPATH) { 319 str_mdata_fastpath_put(dsp, mp); 320 } else if (dsp->ds_mode == DLD_RAW) { 321 str_mdata_raw_put(dsp, mp); 322 } 323 rw_exit(&dsp->ds_lock); 324 break; 325 case M_PROTO: 326 case M_PCPROTO: 327 dld_proto(dsp, mp); 328 break; 329 case M_IOCTL: 330 dld_ioc(dsp, mp); 331 break; 332 case M_FLUSH: 333 if (*mp->b_rptr & FLUSHW) { 334 dld_tx_flush(dsp); 335 *mp->b_rptr &= ~FLUSHW; 336 } 337 338 if (*mp->b_rptr & FLUSHR) { 339 qreply(wq, mp); 340 } else { 341 freemsg(mp); 342 } 343 break; 344 default: 345 freemsg(mp); 346 break; 347 } 348 349 DLD_EXIT(dsp); 350 } 351 352 /* 353 * qi_srvp: srv(9e) 354 */ 355 void 356 dld_wsrv(queue_t *wq) 357 { 358 mblk_t *mp; 359 dld_str_t *dsp = wq->q_ptr; 360 361 DLD_ENTER(dsp); 362 rw_enter(&dsp->ds_lock, RW_READER); 363 /* 364 * Grab all packets (chained via b_next) off our transmit queue 365 * and try to send them all to the MAC layer. Since the queue 366 * is independent of streams, we are able to dequeue all messages 367 * at once without looping through getq() and manually chaining 368 * them. Note that the queue size parameters (byte and message 369 * counts) are cleared as well, but we postpone the backenabling 370 * until after the MAC transmit since some packets may end up 371 * back at our transmit queue. 372 */ 373 mutex_enter(&dsp->ds_tx_list_lock); 374 if ((mp = dsp->ds_tx_list_head) == NULL) { 375 ASSERT(!dsp->ds_tx_qbusy); 376 ASSERT(dsp->ds_tx_flow_mp != NULL); 377 ASSERT(dsp->ds_tx_list_head == NULL); 378 ASSERT(dsp->ds_tx_list_tail == NULL); 379 ASSERT(dsp->ds_tx_cnt == 0); 380 ASSERT(dsp->ds_tx_msgcnt == 0); 381 mutex_exit(&dsp->ds_tx_list_lock); 382 goto done; 383 } 384 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 385 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 386 mutex_exit(&dsp->ds_tx_list_lock); 387 388 /* 389 * Discard packets unless we are attached and bound; note that 390 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 391 * because regardless of the mode all transmit will end up in 392 * str_mdata_fastpath_put() where the packets may be queued. 393 */ 394 ASSERT(DB_TYPE(mp) == M_DATA); 395 if (dsp->ds_dlstate != DL_IDLE) { 396 freemsgchain(mp); 397 goto done; 398 } 399 400 /* 401 * Attempt to transmit one or more packets. If the MAC can't 402 * send them all, re-queue the packet(s) at the beginning of 403 * the transmit queue to avoid any re-ordering. 404 */ 405 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 406 dld_tx_enqueue(dsp, mp, B_TRUE); 407 408 /* 409 * Grab the list lock again and check if the transmit queue is 410 * really empty; if so, lift up flow-control and backenable any 411 * writer queues. If the queue is not empty, schedule service 412 * thread to drain it. 413 */ 414 mutex_enter(&dsp->ds_tx_list_lock); 415 if (dsp->ds_tx_list_head == NULL) { 416 dsp->ds_tx_flow_mp = getq(wq); 417 ASSERT(dsp->ds_tx_flow_mp != NULL); 418 dsp->ds_tx_qbusy = B_FALSE; 419 } 420 mutex_exit(&dsp->ds_tx_list_lock); 421 done: 422 rw_exit(&dsp->ds_lock); 423 DLD_EXIT(dsp); 424 } 425 426 void 427 dld_init_ops(struct dev_ops *ops, const char *name) 428 { 429 struct streamtab *stream; 430 struct qinit *rq, *wq; 431 struct module_info *modinfo; 432 433 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 434 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 435 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 436 modinfo->mi_minpsz = 0; 437 modinfo->mi_maxpsz = 64*1024; 438 modinfo->mi_hiwat = 1; 439 modinfo->mi_lowat = 0; 440 441 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 442 rq->qi_qopen = dld_open; 443 rq->qi_qclose = dld_close; 444 rq->qi_minfo = modinfo; 445 446 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 447 wq->qi_putp = (pfi_t)dld_wput; 448 wq->qi_srvp = (pfi_t)dld_wsrv; 449 wq->qi_minfo = modinfo; 450 451 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 452 stream->st_rdinit = rq; 453 stream->st_wrinit = wq; 454 ops->devo_cb_ops->cb_str = stream; 455 456 ops->devo_getinfo = &dld_getinfo; 457 } 458 459 void 460 dld_fini_ops(struct dev_ops *ops) 461 { 462 struct streamtab *stream; 463 struct qinit *rq, *wq; 464 struct module_info *modinfo; 465 466 stream = ops->devo_cb_ops->cb_str; 467 rq = stream->st_rdinit; 468 wq = stream->st_wrinit; 469 modinfo = rq->qi_minfo; 470 ASSERT(wq->qi_minfo == modinfo); 471 472 kmem_free(stream, sizeof (struct streamtab)); 473 kmem_free(wq, sizeof (struct qinit)); 474 kmem_free(rq, sizeof (struct qinit)); 475 kmem_free(modinfo->mi_idname, FMNAMESZ); 476 kmem_free(modinfo, sizeof (struct module_info)); 477 } 478 479 /* 480 * Initialize this module's data structures. 481 */ 482 void 483 dld_str_init(void) 484 { 485 /* 486 * Create dld_str_t object cache. 487 */ 488 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 489 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 490 ASSERT(str_cachep != NULL); 491 492 /* 493 * Allocate a vmem arena to manage minor numbers. The range of the 494 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 495 * minor number). 496 */ 497 minor_arenap = vmem_create("dld_minor_arena", 498 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 499 VM_SLEEP | VMC_IDENTIFIER); 500 ASSERT(minor_arenap != NULL); 501 } 502 503 /* 504 * Tear down this module's data structures. 505 */ 506 int 507 dld_str_fini(void) 508 { 509 /* 510 * Make sure that there are no objects in use. 511 */ 512 if (str_count != 0) 513 return (EBUSY); 514 515 /* 516 * Check to see if there are any minor numbers still in use. 517 */ 518 if (minor_count != 0) 519 return (EBUSY); 520 521 /* 522 * Destroy object cache. 523 */ 524 kmem_cache_destroy(str_cachep); 525 vmem_destroy(minor_arenap); 526 return (0); 527 } 528 529 /* 530 * Create a new dld_str_t object. 531 */ 532 dld_str_t * 533 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 534 { 535 dld_str_t *dsp; 536 537 /* 538 * Allocate an object from the cache. 539 */ 540 atomic_add_32(&str_count, 1); 541 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 542 543 /* 544 * Allocate the dummy mblk for flow-control. 545 */ 546 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 547 if (dsp->ds_tx_flow_mp == NULL) { 548 kmem_cache_free(str_cachep, dsp); 549 atomic_add_32(&str_count, -1); 550 return (NULL); 551 } 552 dsp->ds_type = type; 553 dsp->ds_major = major; 554 dsp->ds_style = style; 555 556 /* 557 * Initialize the queue pointers. 558 */ 559 ASSERT(RD(rq) == rq); 560 dsp->ds_rq = rq; 561 dsp->ds_wq = WR(rq); 562 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 563 564 /* 565 * We want explicit control over our write-side STREAMS queue 566 * where the dummy mblk gets added/removed for flow-control. 567 */ 568 noenable(WR(rq)); 569 570 return (dsp); 571 } 572 573 /* 574 * Destroy a dld_str_t object. 575 */ 576 void 577 dld_str_destroy(dld_str_t *dsp) 578 { 579 queue_t *rq; 580 queue_t *wq; 581 582 /* 583 * Clear the queue pointers. 584 */ 585 rq = dsp->ds_rq; 586 wq = dsp->ds_wq; 587 ASSERT(wq == WR(rq)); 588 589 rq->q_ptr = wq->q_ptr = NULL; 590 dsp->ds_rq = dsp->ds_wq = NULL; 591 592 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 593 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 594 ASSERT(dsp->ds_tx_list_head == NULL); 595 ASSERT(dsp->ds_tx_list_tail == NULL); 596 ASSERT(dsp->ds_tx_cnt == 0); 597 ASSERT(dsp->ds_tx_msgcnt == 0); 598 ASSERT(!dsp->ds_tx_qbusy); 599 600 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 601 ASSERT(dsp->ds_thr == 0); 602 ASSERT(dsp->ds_pending_req == NULL); 603 604 /* 605 * Reinitialize all the flags. 606 */ 607 dsp->ds_notifications = 0; 608 dsp->ds_passivestate = DLD_UNINITIALIZED; 609 dsp->ds_mode = DLD_UNITDATA; 610 611 /* 612 * Free the dummy mblk if exists. 613 */ 614 if (dsp->ds_tx_flow_mp != NULL) { 615 freeb(dsp->ds_tx_flow_mp); 616 dsp->ds_tx_flow_mp = NULL; 617 } 618 /* 619 * Free the object back to the cache. 620 */ 621 kmem_cache_free(str_cachep, dsp); 622 atomic_add_32(&str_count, -1); 623 } 624 625 /* 626 * kmem_cache contructor function: see kmem_cache_create(9f). 627 */ 628 /*ARGSUSED*/ 629 static int 630 str_constructor(void *buf, void *cdrarg, int kmflags) 631 { 632 dld_str_t *dsp = buf; 633 634 bzero(buf, sizeof (dld_str_t)); 635 636 /* 637 * Allocate a new minor number. 638 */ 639 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 640 return (-1); 641 642 /* 643 * Initialize the DLPI state machine. 644 */ 645 dsp->ds_dlstate = DL_UNATTACHED; 646 647 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 648 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 649 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 650 cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); 651 652 return (0); 653 } 654 655 /* 656 * kmem_cache destructor function. 657 */ 658 /*ARGSUSED*/ 659 static void 660 str_destructor(void *buf, void *cdrarg) 661 { 662 dld_str_t *dsp = buf; 663 664 /* 665 * Make sure the DLPI state machine was reset. 666 */ 667 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 668 669 /* 670 * Make sure the data-link interface was closed. 671 */ 672 ASSERT(dsp->ds_mh == NULL); 673 ASSERT(dsp->ds_dc == NULL); 674 675 /* 676 * Make sure enabled notifications are cleared. 677 */ 678 ASSERT(dsp->ds_notifications == 0); 679 680 /* 681 * Make sure polling is disabled. 682 */ 683 ASSERT(!dsp->ds_polling); 684 685 /* 686 * Release the minor number. 687 */ 688 dld_minor_rele(dsp->ds_minor); 689 690 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 691 rw_destroy(&dsp->ds_lock); 692 693 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 694 mutex_destroy(&dsp->ds_tx_list_lock); 695 ASSERT(dsp->ds_tx_flow_mp == NULL); 696 697 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 698 mutex_destroy(&dsp->ds_thr_lock); 699 ASSERT(dsp->ds_pending_req == NULL); 700 ASSERT(dsp->ds_pending_op == NULL); 701 ASSERT(dsp->ds_pending_cnt == 0); 702 cv_destroy(&dsp->ds_pending_cv); 703 } 704 705 /* 706 * M_DATA put (IP fast-path mode) 707 */ 708 void 709 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 710 { 711 /* 712 * This function can be called from within dld or from an upper 713 * layer protocol (currently only tcp). If we are in the busy 714 * mode enqueue the packet(s) and return. Otherwise hand them 715 * over to the MAC driver for transmission; any remaining one(s) 716 * which didn't get sent will be queued. 717 * 718 * Note here that we don't grab the list lock prior to checking 719 * the busy flag. This is okay, because a missed transition 720 * will not cause any packet reordering for any particular TCP 721 * connection (which is single-threaded). The enqueue routine 722 * will atomically set the busy flag and schedule the service 723 * thread to run; the flag is only cleared by the service thread 724 * when there is no more packet to be transmitted. 725 */ 726 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 727 dld_tx_enqueue(dsp, mp, B_FALSE); 728 } 729 730 /* 731 * M_DATA put (raw mode) 732 */ 733 void 734 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 735 { 736 struct ether_header *ehp; 737 mblk_t *bp; 738 size_t size; 739 size_t hdrlen; 740 741 size = MBLKL(mp); 742 if (size < sizeof (struct ether_header)) 743 goto discard; 744 745 hdrlen = sizeof (struct ether_header); 746 747 ehp = (struct ether_header *)mp->b_rptr; 748 if (ntohs(ehp->ether_type) == VLAN_TPID) { 749 struct ether_vlan_header *evhp; 750 751 if (size < sizeof (struct ether_vlan_header)) 752 goto discard; 753 754 /* 755 * Replace vtag with our own 756 */ 757 evhp = (struct ether_vlan_header *)ehp; 758 evhp->ether_tci = htons(VLAN_TCI(dsp->ds_pri, 759 ETHER_CFI, dsp->ds_vid)); 760 hdrlen = sizeof (struct ether_vlan_header); 761 } 762 763 /* 764 * Check the packet is not too big and that any remaining 765 * fragment list is composed entirely of M_DATA messages. (We 766 * know the first fragment was M_DATA otherwise we could not 767 * have got here). 768 */ 769 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 770 if (DB_TYPE(bp) != M_DATA) 771 goto discard; 772 size += MBLKL(bp); 773 } 774 775 if (size > dsp->ds_mip->mi_sdu_max + hdrlen) 776 goto discard; 777 778 str_mdata_fastpath_put(dsp, mp); 779 return; 780 781 discard: 782 freemsg(mp); 783 } 784 785 /* 786 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 787 */ 788 int 789 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 790 { 791 int err; 792 const char *drvname; 793 char name[MAXNAMELEN]; 794 dls_channel_t dc; 795 uint_t addr_length; 796 797 ASSERT(dsp->ds_dc == NULL); 798 799 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 800 return (EINVAL); 801 802 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 803 804 if (strcmp(drvname, "aggr") != 0 && 805 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 806 return (EINVAL); 807 808 /* 809 * Open a channel. 810 */ 811 if ((err = dls_open(name, &dc)) != 0) { 812 (void) qassociate(dsp->ds_wq, -1); 813 return (err); 814 } 815 816 /* 817 * Cache the MAC interface handle, a pointer to the immutable MAC 818 * information and the current and 'factory' MAC address. 819 */ 820 dsp->ds_mh = dls_mac(dc); 821 dsp->ds_mip = mac_info(dsp->ds_mh); 822 823 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 824 825 addr_length = dsp->ds_mip->mi_addr_length; 826 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 827 828 /* 829 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 830 * a non-VLAN interface). 831 */ 832 dsp->ds_vid = dls_vid(dc); 833 834 /* 835 * Set the default packet priority. 836 */ 837 dsp->ds_pri = 0; 838 839 /* 840 * Add a notify function so that the we get updates from the MAC. 841 */ 842 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 843 844 dsp->ds_dc = dc; 845 dsp->ds_dlstate = DL_UNBOUND; 846 847 return (0); 848 } 849 850 /* 851 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 852 * from close(2) for style 2. 853 */ 854 void 855 dld_str_detach(dld_str_t *dsp) 856 { 857 ASSERT(dsp->ds_thr == 0); 858 859 /* 860 * Remove the notify function. 861 */ 862 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 863 864 /* 865 * Clear the polling and promisc flags. 866 */ 867 dsp->ds_polling = B_FALSE; 868 dsp->ds_soft_ring = B_FALSE; 869 dsp->ds_promisc = 0; 870 871 /* 872 * Close the channel. 873 */ 874 dls_close(dsp->ds_dc); 875 dsp->ds_dc = NULL; 876 dsp->ds_mh = NULL; 877 878 (void) qassociate(dsp->ds_wq, -1); 879 880 /* 881 * Re-initialize the DLPI state machine. 882 */ 883 dsp->ds_dlstate = DL_UNATTACHED; 884 885 } 886 887 /* 888 * Raw mode receive function. 889 */ 890 /*ARGSUSED*/ 891 void 892 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 893 size_t header_length) 894 { 895 dld_str_t *dsp = (dld_str_t *)arg; 896 mblk_t *next; 897 898 ASSERT(mp != NULL); 899 do { 900 /* 901 * Get the pointer to the next packet in the chain and then 902 * clear b_next before the packet gets passed on. 903 */ 904 next = mp->b_next; 905 mp->b_next = NULL; 906 907 /* 908 * Wind back b_rptr to point at the MAC header. 909 */ 910 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 911 mp->b_rptr -= header_length; 912 if (header_length == sizeof (struct ether_vlan_header)) { 913 /* 914 * Strip off the vtag 915 */ 916 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 917 2 * ETHERADDRL); 918 mp->b_rptr += VLAN_TAGSZ; 919 } 920 921 /* 922 * Pass the packet on. 923 */ 924 putnext(dsp->ds_rq, mp); 925 926 /* 927 * Move on to the next packet in the chain. 928 */ 929 mp = next; 930 } while (mp != NULL); 931 } 932 933 /* 934 * Fast-path receive function. 935 */ 936 /*ARGSUSED*/ 937 void 938 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 939 size_t header_length) 940 { 941 dld_str_t *dsp = (dld_str_t *)arg; 942 mblk_t *next; 943 944 ASSERT(mp != NULL); 945 do { 946 /* 947 * Get the pointer to the next packet in the chain and then 948 * clear b_next before the packet gets passed on. 949 */ 950 next = mp->b_next; 951 mp->b_next = NULL; 952 953 /* 954 * Pass the packet on. 955 */ 956 putnext(dsp->ds_rq, mp); 957 958 /* 959 * Move on to the next packet in the chain. 960 */ 961 mp = next; 962 } while (mp != NULL); 963 } 964 965 /* 966 * Default receive function (send DL_UNITDATA_IND messages). 967 */ 968 /*ARGSUSED*/ 969 void 970 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 971 size_t header_length) 972 { 973 dld_str_t *dsp = (dld_str_t *)arg; 974 mblk_t *ud_mp; 975 mblk_t *next; 976 977 ASSERT(mp != NULL); 978 do { 979 /* 980 * Get the pointer to the next packet in the chain and then 981 * clear b_next before the packet gets passed on. 982 */ 983 next = mp->b_next; 984 mp->b_next = NULL; 985 986 /* 987 * Wind back b_rptr to point at the MAC header. 988 */ 989 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 990 mp->b_rptr -= header_length; 991 992 /* 993 * Create the DL_UNITDATA_IND M_PROTO. 994 */ 995 if ((ud_mp = str_unitdata_ind(dsp, mp)) == NULL) { 996 freemsgchain(mp); 997 return; 998 } 999 1000 /* 1001 * Advance b_rptr to point at the payload again. 1002 */ 1003 mp->b_rptr += header_length; 1004 1005 /* 1006 * Prepend the DL_UNITDATA_IND. 1007 */ 1008 ud_mp->b_cont = mp; 1009 1010 /* 1011 * Send the message. 1012 */ 1013 putnext(dsp->ds_rq, ud_mp); 1014 1015 /* 1016 * Move on to the next packet in the chain. 1017 */ 1018 mp = next; 1019 } while (mp != NULL); 1020 } 1021 1022 /* 1023 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1024 * current state of the interface. 1025 */ 1026 void 1027 dld_str_notify_ind(dld_str_t *dsp) 1028 { 1029 mac_notify_type_t type; 1030 1031 for (type = 0; type < MAC_NNOTE; type++) 1032 str_notify(dsp, type); 1033 } 1034 1035 typedef struct dl_unitdata_ind_wrapper { 1036 dl_unitdata_ind_t dl_unitdata; 1037 uint8_t dl_dest_addr[MAXADDRLEN + sizeof (uint16_t)]; 1038 uint8_t dl_src_addr[MAXADDRLEN + sizeof (uint16_t)]; 1039 } dl_unitdata_ind_wrapper_t; 1040 1041 /* 1042 * Create a DL_UNITDATA_IND M_PROTO message. 1043 */ 1044 static mblk_t * 1045 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp) 1046 { 1047 mblk_t *nmp; 1048 dl_unitdata_ind_wrapper_t *dlwp; 1049 dl_unitdata_ind_t *dlp; 1050 dls_header_info_t dhi; 1051 uint_t addr_length; 1052 uint8_t *daddr; 1053 uint8_t *saddr; 1054 1055 /* 1056 * Get the packet header information. 1057 */ 1058 dls_header_info(dsp->ds_dc, mp, &dhi); 1059 1060 /* 1061 * Allocate a message large enough to contain the wrapper structure 1062 * defined above. 1063 */ 1064 if ((nmp = mexchange(dsp->ds_wq, NULL, 1065 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1066 DL_UNITDATA_IND)) == NULL) 1067 return (NULL); 1068 1069 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1070 1071 dlp = &(dlwp->dl_unitdata); 1072 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1073 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1074 1075 /* 1076 * Copy in the destination address. 1077 */ 1078 addr_length = dsp->ds_mip->mi_addr_length; 1079 daddr = dlwp->dl_dest_addr; 1080 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1081 bcopy(dhi.dhi_daddr, daddr, addr_length); 1082 1083 /* 1084 * Set the destination DLSAP to our bound DLSAP value. 1085 */ 1086 *(uint16_t *)(daddr + addr_length) = dsp->ds_sap; 1087 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1088 1089 /* 1090 * If the destination address was a group address then 1091 * dl_group_address field should be non-zero. 1092 */ 1093 dlp->dl_group_address = dhi.dhi_isgroup; 1094 1095 /* 1096 * Copy in the source address. 1097 */ 1098 saddr = dlwp->dl_src_addr; 1099 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1100 bcopy(dhi.dhi_saddr, saddr, addr_length); 1101 1102 /* 1103 * Set the source DLSAP to the packet ethertype. 1104 */ 1105 *(uint16_t *)(saddr + addr_length) = dhi.dhi_ethertype; 1106 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1107 1108 return (nmp); 1109 } 1110 1111 /* 1112 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1113 */ 1114 static void 1115 str_notify_promisc_on_phys(dld_str_t *dsp) 1116 { 1117 mblk_t *mp; 1118 dl_notify_ind_t *dlip; 1119 1120 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1121 return; 1122 1123 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1124 M_PROTO, 0)) == NULL) 1125 return; 1126 1127 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1128 dlip = (dl_notify_ind_t *)mp->b_rptr; 1129 dlip->dl_primitive = DL_NOTIFY_IND; 1130 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1131 1132 qreply(dsp->ds_wq, mp); 1133 } 1134 1135 /* 1136 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1137 */ 1138 static void 1139 str_notify_promisc_off_phys(dld_str_t *dsp) 1140 { 1141 mblk_t *mp; 1142 dl_notify_ind_t *dlip; 1143 1144 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1145 return; 1146 1147 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1148 M_PROTO, 0)) == NULL) 1149 return; 1150 1151 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1152 dlip = (dl_notify_ind_t *)mp->b_rptr; 1153 dlip->dl_primitive = DL_NOTIFY_IND; 1154 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1155 1156 qreply(dsp->ds_wq, mp); 1157 } 1158 1159 /* 1160 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1161 */ 1162 static void 1163 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1164 { 1165 mblk_t *mp; 1166 dl_notify_ind_t *dlip; 1167 uint_t addr_length; 1168 uint16_t ethertype; 1169 1170 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1171 return; 1172 1173 addr_length = dsp->ds_mip->mi_addr_length; 1174 if ((mp = mexchange(dsp->ds_wq, NULL, 1175 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1176 M_PROTO, 0)) == NULL) 1177 return; 1178 1179 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1180 dlip = (dl_notify_ind_t *)mp->b_rptr; 1181 dlip->dl_primitive = DL_NOTIFY_IND; 1182 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1183 dlip->dl_data = DL_CURR_PHYS_ADDR; 1184 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1185 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1186 1187 bcopy(addr, &dlip[1], addr_length); 1188 1189 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1190 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1191 ethertype; 1192 1193 qreply(dsp->ds_wq, mp); 1194 } 1195 1196 /* 1197 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1198 */ 1199 static void 1200 str_notify_link_up(dld_str_t *dsp) 1201 { 1202 mblk_t *mp; 1203 dl_notify_ind_t *dlip; 1204 1205 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1206 return; 1207 1208 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1209 M_PROTO, 0)) == NULL) 1210 return; 1211 1212 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1213 dlip = (dl_notify_ind_t *)mp->b_rptr; 1214 dlip->dl_primitive = DL_NOTIFY_IND; 1215 dlip->dl_notification = DL_NOTE_LINK_UP; 1216 1217 qreply(dsp->ds_wq, mp); 1218 } 1219 1220 /* 1221 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1222 */ 1223 static void 1224 str_notify_link_down(dld_str_t *dsp) 1225 { 1226 mblk_t *mp; 1227 dl_notify_ind_t *dlip; 1228 1229 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1230 return; 1231 1232 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1233 M_PROTO, 0)) == NULL) 1234 return; 1235 1236 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1237 dlip = (dl_notify_ind_t *)mp->b_rptr; 1238 dlip->dl_primitive = DL_NOTIFY_IND; 1239 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1240 1241 qreply(dsp->ds_wq, mp); 1242 } 1243 1244 /* 1245 * DL_NOTIFY_IND: DL_NOTE_SPEED 1246 */ 1247 static void 1248 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1249 { 1250 mblk_t *mp; 1251 dl_notify_ind_t *dlip; 1252 1253 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1254 return; 1255 1256 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1257 M_PROTO, 0)) == NULL) 1258 return; 1259 1260 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1261 dlip = (dl_notify_ind_t *)mp->b_rptr; 1262 dlip->dl_primitive = DL_NOTIFY_IND; 1263 dlip->dl_notification = DL_NOTE_SPEED; 1264 dlip->dl_data = speed; 1265 1266 qreply(dsp->ds_wq, mp); 1267 } 1268 1269 /* 1270 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1271 */ 1272 static void 1273 str_notify_capab_reneg(dld_str_t *dsp) 1274 { 1275 mblk_t *mp; 1276 dl_notify_ind_t *dlip; 1277 1278 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1279 return; 1280 1281 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1282 M_PROTO, 0)) == NULL) 1283 return; 1284 1285 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1286 dlip = (dl_notify_ind_t *)mp->b_rptr; 1287 dlip->dl_primitive = DL_NOTIFY_IND; 1288 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1289 1290 qreply(dsp->ds_wq, mp); 1291 } 1292 1293 /* 1294 * MAC notification callback. 1295 */ 1296 static void 1297 str_notify(void *arg, mac_notify_type_t type) 1298 { 1299 dld_str_t *dsp = (dld_str_t *)arg; 1300 queue_t *q = dsp->ds_wq; 1301 1302 switch (type) { 1303 case MAC_NOTE_TX: 1304 qenable(q); 1305 break; 1306 1307 case MAC_NOTE_DEVPROMISC: 1308 /* 1309 * Send the appropriate DL_NOTIFY_IND. 1310 */ 1311 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1312 str_notify_promisc_on_phys(dsp); 1313 else 1314 str_notify_promisc_off_phys(dsp); 1315 break; 1316 1317 case MAC_NOTE_PROMISC: 1318 break; 1319 1320 case MAC_NOTE_UNICST: 1321 /* 1322 * This notification is sent whenever the MAC unicast address 1323 * changes. We need to re-cache the address. 1324 */ 1325 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1326 1327 /* 1328 * Send the appropriate DL_NOTIFY_IND. 1329 */ 1330 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1331 break; 1332 1333 case MAC_NOTE_LINK: 1334 /* 1335 * This notification is sent every time the MAC driver 1336 * updates the link state. 1337 */ 1338 switch (mac_link_get(dsp->ds_mh)) { 1339 case LINK_STATE_UP: 1340 /* 1341 * The link is up so send the appropriate 1342 * DL_NOTIFY_IND. 1343 */ 1344 str_notify_link_up(dsp); 1345 1346 /* 1347 * If we can find the link speed then send a 1348 * DL_NOTIFY_IND for that too. 1349 */ 1350 if (dsp->ds_mip->mi_stat[MAC_STAT_IFSPEED]) { 1351 uint64_t val; 1352 1353 val = mac_stat_get(dsp->ds_mh, 1354 MAC_STAT_IFSPEED); 1355 str_notify_speed(dsp, 1356 (uint32_t)(val / 1000ull)); 1357 } 1358 break; 1359 1360 case LINK_STATE_DOWN: 1361 /* 1362 * The link is down so send the appropriate 1363 * DL_NOTIFY_IND. 1364 */ 1365 str_notify_link_down(dsp); 1366 break; 1367 1368 default: 1369 break; 1370 } 1371 break; 1372 1373 case MAC_NOTE_RESOURCE: 1374 /* 1375 * This notification is sent whenever the MAC resources 1376 * change. We need to renegotiate the capabilities. 1377 * Send the appropriate DL_NOTIFY_IND. 1378 */ 1379 str_notify_capab_reneg(dsp); 1380 break; 1381 1382 default: 1383 ASSERT(B_FALSE); 1384 break; 1385 } 1386 } 1387 1388 /* 1389 * Enqueue one or more messages to the transmit queue. 1390 * Caller specifies the insertion position (head/tail). 1391 */ 1392 void 1393 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1394 { 1395 mblk_t *tail; 1396 queue_t *q = dsp->ds_wq; 1397 uint_t cnt, msgcnt; 1398 uint_t tot_cnt, tot_msgcnt; 1399 1400 ASSERT(DB_TYPE(mp) == M_DATA); 1401 /* Calculate total size and count of the packet(s) */ 1402 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1403 tail->b_next != NULL; tail = tail->b_next) { 1404 ASSERT(DB_TYPE(tail) == M_DATA); 1405 cnt += msgdsize(tail); 1406 msgcnt++; 1407 } 1408 1409 mutex_enter(&dsp->ds_tx_list_lock); 1410 /* 1411 * If the queue depth would exceed the allowed threshold, drop 1412 * new packet(s) and drain those already in the queue. 1413 */ 1414 tot_cnt = dsp->ds_tx_cnt + cnt; 1415 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1416 1417 if (!head_insert && 1418 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1419 ASSERT(dsp->ds_tx_qbusy); 1420 mutex_exit(&dsp->ds_tx_list_lock); 1421 freemsgchain(mp); 1422 goto done; 1423 } 1424 1425 /* Update the queue size parameters */ 1426 dsp->ds_tx_cnt = tot_cnt; 1427 dsp->ds_tx_msgcnt = tot_msgcnt; 1428 1429 /* 1430 * If the transmit queue is currently empty and we are 1431 * about to deposit the packet(s) there, switch mode to 1432 * "busy" and raise flow-control condition. 1433 */ 1434 if (!dsp->ds_tx_qbusy) { 1435 dsp->ds_tx_qbusy = B_TRUE; 1436 ASSERT(dsp->ds_tx_flow_mp != NULL); 1437 (void) putq(q, dsp->ds_tx_flow_mp); 1438 dsp->ds_tx_flow_mp = NULL; 1439 } 1440 1441 if (!head_insert) { 1442 /* Tail insertion */ 1443 if (dsp->ds_tx_list_head == NULL) 1444 dsp->ds_tx_list_head = mp; 1445 else 1446 dsp->ds_tx_list_tail->b_next = mp; 1447 dsp->ds_tx_list_tail = tail; 1448 } else { 1449 /* Head insertion */ 1450 tail->b_next = dsp->ds_tx_list_head; 1451 if (dsp->ds_tx_list_head == NULL) 1452 dsp->ds_tx_list_tail = tail; 1453 dsp->ds_tx_list_head = mp; 1454 } 1455 mutex_exit(&dsp->ds_tx_list_lock); 1456 done: 1457 /* Schedule service thread to drain the transmit queue */ 1458 qenable(q); 1459 } 1460 1461 void 1462 dld_tx_flush(dld_str_t *dsp) 1463 { 1464 mutex_enter(&dsp->ds_tx_list_lock); 1465 if (dsp->ds_tx_list_head != NULL) { 1466 freemsgchain(dsp->ds_tx_list_head); 1467 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1468 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1469 if (dsp->ds_tx_qbusy) { 1470 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1471 ASSERT(dsp->ds_tx_flow_mp != NULL); 1472 dsp->ds_tx_qbusy = B_FALSE; 1473 } 1474 } 1475 mutex_exit(&dsp->ds_tx_list_lock); 1476 } 1477 1478 /* 1479 * Process an M_IOCTL message. 1480 */ 1481 static void 1482 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1483 { 1484 uint_t cmd; 1485 1486 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1487 ASSERT(dsp->ds_type == DLD_DLPI); 1488 1489 switch (cmd) { 1490 case DLIOCRAW: 1491 ioc_raw(dsp, mp); 1492 break; 1493 case DLIOCHDRINFO: 1494 ioc_fast(dsp, mp); 1495 break; 1496 default: 1497 ioc(dsp, mp); 1498 } 1499 } 1500 1501 /* 1502 * DLIOCRAW 1503 */ 1504 static void 1505 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1506 { 1507 queue_t *q = dsp->ds_wq; 1508 1509 rw_enter(&dsp->ds_lock, RW_WRITER); 1510 if (dsp->ds_polling || dsp->ds_soft_ring) { 1511 rw_exit(&dsp->ds_lock); 1512 miocnak(q, mp, 0, EPROTO); 1513 return; 1514 } 1515 1516 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1517 /* 1518 * Set the receive callback. 1519 */ 1520 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1521 1522 /* 1523 * Note that raw mode is enabled. 1524 */ 1525 dsp->ds_mode = DLD_RAW; 1526 } 1527 1528 rw_exit(&dsp->ds_lock); 1529 miocack(q, mp, 0, 0); 1530 } 1531 1532 /* 1533 * DLIOCHDRINFO 1534 */ 1535 static void 1536 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1537 { 1538 dl_unitdata_req_t *dlp; 1539 off_t off; 1540 size_t len; 1541 const uint8_t *addr; 1542 uint16_t sap; 1543 mblk_t *nmp; 1544 mblk_t *hmp; 1545 uint_t addr_length; 1546 queue_t *q = dsp->ds_wq; 1547 int err; 1548 dls_channel_t dc; 1549 1550 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1551 err = ENOTSUP; 1552 goto failed; 1553 } 1554 1555 nmp = mp->b_cont; 1556 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1557 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1558 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1559 err = EINVAL; 1560 goto failed; 1561 } 1562 1563 off = dlp->dl_dest_addr_offset; 1564 len = dlp->dl_dest_addr_length; 1565 1566 if (!MBLKIN(nmp, off, len)) { 1567 err = EINVAL; 1568 goto failed; 1569 } 1570 1571 rw_enter(&dsp->ds_lock, RW_READER); 1572 if (dsp->ds_dlstate != DL_IDLE) { 1573 rw_exit(&dsp->ds_lock); 1574 err = ENOTSUP; 1575 goto failed; 1576 } 1577 1578 addr_length = dsp->ds_mip->mi_addr_length; 1579 if (len != addr_length + sizeof (uint16_t)) { 1580 rw_exit(&dsp->ds_lock); 1581 err = EINVAL; 1582 goto failed; 1583 } 1584 1585 addr = nmp->b_rptr + off; 1586 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 1587 dc = dsp->ds_dc; 1588 1589 if ((hmp = dls_header(dc, addr, sap, dsp->ds_pri)) == NULL) { 1590 rw_exit(&dsp->ds_lock); 1591 err = ENOMEM; 1592 goto failed; 1593 } 1594 1595 /* 1596 * This is a performance optimization. We originally entered 1597 * as reader and only become writer upon transitioning into 1598 * the DLD_FASTPATH mode for the first time. Otherwise we 1599 * stay as reader and return the fast-path header to IP. 1600 */ 1601 if (dsp->ds_mode != DLD_FASTPATH) { 1602 if (!rw_tryupgrade(&dsp->ds_lock)) { 1603 rw_exit(&dsp->ds_lock); 1604 rw_enter(&dsp->ds_lock, RW_WRITER); 1605 1606 /* 1607 * State may have changed before we re-acquired 1608 * the writer lock in case the upgrade failed. 1609 */ 1610 if (dsp->ds_dlstate != DL_IDLE) { 1611 rw_exit(&dsp->ds_lock); 1612 err = ENOTSUP; 1613 goto failed; 1614 } 1615 } 1616 1617 /* 1618 * Set the receive callback (unless polling is enabled). 1619 */ 1620 if (!dsp->ds_polling && !dsp->ds_soft_ring) 1621 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 1622 1623 /* 1624 * Note that fast-path mode is enabled. 1625 */ 1626 dsp->ds_mode = DLD_FASTPATH; 1627 } 1628 rw_exit(&dsp->ds_lock); 1629 1630 freemsg(nmp->b_cont); 1631 nmp->b_cont = hmp; 1632 1633 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 1634 return; 1635 failed: 1636 miocnak(q, mp, 0, err); 1637 } 1638 1639 /* 1640 * Catch-all handler. 1641 */ 1642 static void 1643 ioc(dld_str_t *dsp, mblk_t *mp) 1644 { 1645 queue_t *q = dsp->ds_wq; 1646 mac_handle_t mh; 1647 1648 rw_enter(&dsp->ds_lock, RW_READER); 1649 if (dsp->ds_dlstate == DL_UNATTACHED) { 1650 rw_exit(&dsp->ds_lock); 1651 miocnak(q, mp, 0, EINVAL); 1652 return; 1653 } 1654 mh = dsp->ds_mh; 1655 ASSERT(mh != NULL); 1656 rw_exit(&dsp->ds_lock); 1657 mac_ioctl(mh, q, mp); 1658 } 1659 1660 /* 1661 * Allocate a new minor number. 1662 */ 1663 static minor_t 1664 dld_minor_hold(boolean_t sleep) 1665 { 1666 minor_t minor; 1667 1668 /* 1669 * Grab a value from the arena. 1670 */ 1671 atomic_add_32(&minor_count, 1); 1672 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 1673 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 1674 atomic_add_32(&minor_count, -1); 1675 return (0); 1676 } 1677 1678 return (minor); 1679 } 1680 1681 /* 1682 * Release a previously allocated minor number. 1683 */ 1684 static void 1685 dld_minor_rele(minor_t minor) 1686 { 1687 /* 1688 * Return the value to the arena. 1689 */ 1690 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 1691 1692 atomic_add_32(&minor_count, -1); 1693 } 1694