1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Data-Link Driver 31 */ 32 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strsubr.h> 36 #include <sys/atomic.h> 37 #include <sys/mkdev.h> 38 #include <sys/vlan.h> 39 #include <sys/dld.h> 40 #include <sys/dld_impl.h> 41 #include <sys/dls_impl.h> 42 #include <inet/common.h> 43 44 static int str_constructor(void *, void *, int); 45 static void str_destructor(void *, void *); 46 static mblk_t *str_unitdata_ind(dld_str_t *, mblk_t *); 47 static void str_notify_promisc_on_phys(dld_str_t *); 48 static void str_notify_promisc_off_phys(dld_str_t *); 49 static void str_notify_phys_addr(dld_str_t *, const uint8_t *); 50 static void str_notify_link_up(dld_str_t *); 51 static void str_notify_link_down(dld_str_t *); 52 static void str_notify_capab_reneg(dld_str_t *); 53 static void str_notify_speed(dld_str_t *, uint32_t); 54 static void str_notify(void *, mac_notify_type_t); 55 56 static void ioc_raw(dld_str_t *, mblk_t *); 57 static void ioc_fast(dld_str_t *, mblk_t *); 58 static void ioc(dld_str_t *, mblk_t *); 59 static void dld_ioc(dld_str_t *, mblk_t *); 60 static minor_t dld_minor_hold(boolean_t); 61 static void dld_minor_rele(minor_t); 62 63 static uint32_t str_count; 64 static kmem_cache_t *str_cachep; 65 static vmem_t *minor_arenap; 66 static uint32_t minor_count; 67 68 #define MINOR_TO_PTR(minor) ((void *)(uintptr_t)(minor)) 69 #define PTR_TO_MINOR(ptr) ((minor_t)(uintptr_t)(ptr)) 70 71 /* 72 * Some notes on entry points, flow-control, queueing and locking: 73 * 74 * This driver exports the traditional STREAMS put entry point as well as 75 * the non-STREAMS fast-path transmit routine which is provided to IP via 76 * the DL_CAPAB_POLL negotiation. The put procedure handles all control 77 * and data operations, while the fast-path routine deals only with M_DATA 78 * fast-path packets. Regardless of the entry point, all outbound packets 79 * will end up in str_mdata_fastpath_put(), where they will be delivered to 80 * the MAC driver. 81 * 82 * The transmit logic operates in two modes: a "not busy" mode where the 83 * packets will be delivered to the MAC for a send attempt, or "busy" mode 84 * where they will be enqueued in the internal queue because of flow-control. 85 * Flow-control happens when the MAC driver indicates the packets couldn't 86 * be transmitted due to lack of resources (e.g. running out of descriptors). 87 * In such case, the driver will place a dummy message on its write-side 88 * STREAMS queue so that the queue is marked as "full". Any subsequent 89 * packets arriving at the driver will be enqueued in the internal queue, 90 * which is drained in the context of the service thread that gets scheduled 91 * whenever the driver is in the "busy" mode. When all packets have been 92 * successfully delivered by MAC and the internal queue is empty, it will 93 * transition to the "not busy" mode by removing the dummy message from the 94 * write-side STREAMS queue; in effect this will trigger backenabling. 95 * The sizes of q_hiwat and q_lowat are set to 1 and 0, respectively, due 96 * to the above reasons. 97 * 98 * The driver implements an internal transmit queue independent of STREAMS. 99 * This allows for flexibility and provides a fast enqueue/dequeue mechanism 100 * compared to the putq() and get() STREAMS interfaces. The only putq() and 101 * getq() operations done by the driver are those related to placing and 102 * removing the dummy message to/from the write-side STREAMS queue for flow- 103 * control purposes. 104 * 105 * Locking is done independent of STREAMS due to the driver being fully MT. 106 * Threads entering the driver (either from put or service entry points) 107 * will most likely be readers, with the exception of a few writer cases 108 * such those handling DLPI attach/detach/bind/unbind/etc. or any of the 109 * DLD-related ioctl requests. The DLPI detach case is special, because 110 * it involves freeing resources and therefore must be single-threaded. 111 * Unfortunately the readers/writers lock can't be used to protect against 112 * it, because the lock is dropped prior to the driver calling places where 113 * putnext() may be invoked, and such places may depend on those resources 114 * to exist. Because of this, the driver always completes the DLPI detach 115 * process when there are no other threads running in the driver. This is 116 * done by keeping track of the number of threads, such that the the last 117 * thread leaving the driver will finish the pending DLPI detach operation. 118 */ 119 120 /* 121 * dld_max_q_count is the queue depth threshold used to limit the number of 122 * outstanding packets or bytes allowed in the queue; once this limit is 123 * reached the driver will free any incoming ones until the queue depth 124 * drops below the threshold. 125 * 126 * This buffering is provided to accomodate clients which do not employ 127 * their own buffering scheme, and to handle occasional packet bursts. 128 * Clients which handle their own buffering will receive positive feedback 129 * from this driver as soon as it transitions into the "busy" state, i.e. 130 * when the queue is initially filled up; they will get backenabled once 131 * the queue is empty. 132 * 133 * The value chosen here is rather arbitrary; in future some intelligent 134 * heuristics may be involved which could take into account the hardware's 135 * transmit ring size, etc. 136 */ 137 uint_t dld_max_q_count = (16 * 1024 *1024); 138 139 static dev_info_t * 140 dld_finddevinfo(dev_t dev) 141 { 142 minor_t minor = getminor(dev); 143 char *drvname = ddi_major_to_name(getmajor(dev)); 144 char name[MAXNAMELEN]; 145 dls_vlan_t *dvp = NULL; 146 dev_info_t *dip = NULL; 147 148 if (drvname == NULL || minor == 0 || minor > DLD_MAX_PPA + 1) 149 return (NULL); 150 151 (void) snprintf(name, MAXNAMELEN, "%s%d", drvname, (int)minor - 1); 152 if (dls_vlan_hold(name, &dvp, B_FALSE) != 0) 153 return (NULL); 154 155 dip = mac_devinfo_get(dvp->dv_dlp->dl_mh); 156 dls_vlan_rele(dvp); 157 return (dip); 158 } 159 160 /* 161 * devo_getinfo: getinfo(9e) 162 */ 163 /*ARGSUSED*/ 164 int 165 dld_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resp) 166 { 167 dev_info_t *devinfo; 168 minor_t minor = getminor((dev_t)arg); 169 int rc = DDI_FAILURE; 170 171 switch (cmd) { 172 case DDI_INFO_DEVT2DEVINFO: 173 if ((devinfo = dld_finddevinfo((dev_t)arg)) != NULL) { 174 *(dev_info_t **)resp = devinfo; 175 rc = DDI_SUCCESS; 176 } 177 break; 178 case DDI_INFO_DEVT2INSTANCE: 179 if (minor > 0 && minor <= DLD_MAX_PPA + 1) { 180 *(int *)resp = (int)minor - 1; 181 rc = DDI_SUCCESS; 182 } 183 break; 184 } 185 return (rc); 186 } 187 188 /* 189 * qi_qopen: open(9e) 190 */ 191 /*ARGSUSED*/ 192 int 193 dld_open(queue_t *rq, dev_t *devp, int flag, int sflag, cred_t *credp) 194 { 195 dld_str_t *dsp; 196 major_t major; 197 minor_t minor; 198 int err; 199 200 if (sflag == MODOPEN) 201 return (ENOTSUP); 202 203 /* 204 * This is a cloning driver and therefore each queue should only 205 * ever get opened once. 206 */ 207 if (rq->q_ptr != NULL) 208 return (EBUSY); 209 210 major = getmajor(*devp); 211 minor = getminor(*devp); 212 if (minor > DLD_MAX_MINOR) 213 return (ENODEV); 214 215 /* 216 * Create a new dld_str_t for the stream. This will grab a new minor 217 * number that will be handed back in the cloned dev_t. Creation may 218 * fail if we can't allocate the dummy mblk used for flow-control. 219 */ 220 dsp = dld_str_create(rq, DLD_DLPI, major, 221 ((minor == 0) ? DL_STYLE2 : DL_STYLE1)); 222 if (dsp == NULL) 223 return (ENOSR); 224 225 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 226 if (minor != 0) { 227 /* 228 * Style 1 open 229 */ 230 231 if ((err = dld_str_attach(dsp, (t_uscalar_t)minor - 1)) != 0) 232 goto failed; 233 ASSERT(dsp->ds_dlstate == DL_UNBOUND); 234 } else { 235 (void) qassociate(rq, -1); 236 } 237 238 /* 239 * Enable the queue srv(9e) routine. 240 */ 241 qprocson(rq); 242 243 /* 244 * Construct a cloned dev_t to hand back. 245 */ 246 *devp = makedevice(getmajor(*devp), dsp->ds_minor); 247 return (0); 248 249 failed: 250 dld_str_destroy(dsp); 251 return (err); 252 } 253 254 /* 255 * qi_qclose: close(9e) 256 */ 257 int 258 dld_close(queue_t *rq) 259 { 260 dld_str_t *dsp = rq->q_ptr; 261 262 /* 263 * Wait until pending requests are processed. 264 */ 265 mutex_enter(&dsp->ds_thr_lock); 266 while (dsp->ds_pending_cnt > 0) 267 cv_wait(&dsp->ds_pending_cv, &dsp->ds_thr_lock); 268 mutex_exit(&dsp->ds_thr_lock); 269 270 /* 271 * Disable the queue srv(9e) routine. 272 */ 273 qprocsoff(rq); 274 275 /* 276 * At this point we can not be entered by any threads via STREAMS 277 * or the direct call interface, which is available only to IP. 278 * After the interface is unplumbed, IP wouldn't have any reference 279 * to this instance, and therefore we are now effectively single 280 * threaded and don't require any lock protection. Flush all 281 * pending packets which are sitting in the transmit queue. 282 */ 283 ASSERT(dsp->ds_thr == 0); 284 dld_tx_flush(dsp); 285 286 /* 287 * This stream was open to a provider node. Check to see 288 * if it has been cleanly shut down. 289 */ 290 if (dsp->ds_dlstate != DL_UNATTACHED) { 291 /* 292 * The stream is either open to a style 1 provider or 293 * this is not clean shutdown. Detach from the PPA. 294 * (This is still ok even in the style 1 case). 295 */ 296 dld_str_detach(dsp); 297 } 298 299 dld_str_destroy(dsp); 300 return (0); 301 } 302 303 /* 304 * qi_qputp: put(9e) 305 */ 306 void 307 dld_wput(queue_t *wq, mblk_t *mp) 308 { 309 dld_str_t *dsp = (dld_str_t *)wq->q_ptr; 310 311 DLD_ENTER(dsp); 312 313 switch (DB_TYPE(mp)) { 314 case M_DATA: 315 rw_enter(&dsp->ds_lock, RW_READER); 316 if (dsp->ds_dlstate != DL_IDLE || 317 dsp->ds_mode == DLD_UNITDATA) { 318 freemsg(mp); 319 } else if (dsp->ds_mode == DLD_FASTPATH) { 320 str_mdata_fastpath_put(dsp, mp); 321 } else if (dsp->ds_mode == DLD_RAW) { 322 str_mdata_raw_put(dsp, mp); 323 } 324 rw_exit(&dsp->ds_lock); 325 break; 326 case M_PROTO: 327 case M_PCPROTO: 328 dld_proto(dsp, mp); 329 break; 330 case M_IOCTL: 331 dld_ioc(dsp, mp); 332 break; 333 case M_FLUSH: 334 if (*mp->b_rptr & FLUSHW) { 335 dld_tx_flush(dsp); 336 *mp->b_rptr &= ~FLUSHW; 337 } 338 339 if (*mp->b_rptr & FLUSHR) { 340 qreply(wq, mp); 341 } else { 342 freemsg(mp); 343 } 344 break; 345 default: 346 freemsg(mp); 347 break; 348 } 349 350 DLD_EXIT(dsp); 351 } 352 353 /* 354 * qi_srvp: srv(9e) 355 */ 356 void 357 dld_wsrv(queue_t *wq) 358 { 359 mblk_t *mp; 360 dld_str_t *dsp = wq->q_ptr; 361 362 DLD_ENTER(dsp); 363 rw_enter(&dsp->ds_lock, RW_READER); 364 /* 365 * Grab all packets (chained via b_next) off our transmit queue 366 * and try to send them all to the MAC layer. Since the queue 367 * is independent of streams, we are able to dequeue all messages 368 * at once without looping through getq() and manually chaining 369 * them. Note that the queue size parameters (byte and message 370 * counts) are cleared as well, but we postpone the backenabling 371 * until after the MAC transmit since some packets may end up 372 * back at our transmit queue. 373 */ 374 mutex_enter(&dsp->ds_tx_list_lock); 375 if ((mp = dsp->ds_tx_list_head) == NULL) { 376 ASSERT(!dsp->ds_tx_qbusy); 377 ASSERT(dsp->ds_tx_flow_mp != NULL); 378 ASSERT(dsp->ds_tx_list_head == NULL); 379 ASSERT(dsp->ds_tx_list_tail == NULL); 380 ASSERT(dsp->ds_tx_cnt == 0); 381 ASSERT(dsp->ds_tx_msgcnt == 0); 382 mutex_exit(&dsp->ds_tx_list_lock); 383 goto done; 384 } 385 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 386 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 387 mutex_exit(&dsp->ds_tx_list_lock); 388 389 /* 390 * Discard packets unless we are attached and bound; note that 391 * the driver mode (fastpath/raw/unitdata) is irrelevant here, 392 * because regardless of the mode all transmit will end up in 393 * str_mdata_fastpath_put() where the packets may be queued. 394 */ 395 ASSERT(DB_TYPE(mp) == M_DATA); 396 if (dsp->ds_dlstate != DL_IDLE) { 397 freemsgchain(mp); 398 goto done; 399 } 400 401 /* 402 * Attempt to transmit one or more packets. If the MAC can't 403 * send them all, re-queue the packet(s) at the beginning of 404 * the transmit queue to avoid any re-ordering. 405 */ 406 if ((mp = dls_tx(dsp->ds_dc, mp)) != NULL) 407 dld_tx_enqueue(dsp, mp, B_TRUE); 408 409 /* 410 * Grab the list lock again and check if the transmit queue is 411 * really empty; if so, lift up flow-control and backenable any 412 * writer queues. If the queue is not empty, schedule service 413 * thread to drain it. 414 */ 415 mutex_enter(&dsp->ds_tx_list_lock); 416 if (dsp->ds_tx_list_head == NULL) { 417 dsp->ds_tx_flow_mp = getq(wq); 418 ASSERT(dsp->ds_tx_flow_mp != NULL); 419 dsp->ds_tx_qbusy = B_FALSE; 420 } 421 mutex_exit(&dsp->ds_tx_list_lock); 422 done: 423 rw_exit(&dsp->ds_lock); 424 DLD_EXIT(dsp); 425 } 426 427 void 428 dld_init_ops(struct dev_ops *ops, const char *name) 429 { 430 struct streamtab *stream; 431 struct qinit *rq, *wq; 432 struct module_info *modinfo; 433 434 modinfo = kmem_zalloc(sizeof (struct module_info), KM_SLEEP); 435 modinfo->mi_idname = kmem_zalloc(FMNAMESZ, KM_SLEEP); 436 (void) snprintf(modinfo->mi_idname, FMNAMESZ, "%s", name); 437 modinfo->mi_minpsz = 0; 438 modinfo->mi_maxpsz = 64*1024; 439 modinfo->mi_hiwat = 1; 440 modinfo->mi_lowat = 0; 441 442 rq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 443 rq->qi_qopen = dld_open; 444 rq->qi_qclose = dld_close; 445 rq->qi_minfo = modinfo; 446 447 wq = kmem_zalloc(sizeof (struct qinit), KM_SLEEP); 448 wq->qi_putp = (pfi_t)dld_wput; 449 wq->qi_srvp = (pfi_t)dld_wsrv; 450 wq->qi_minfo = modinfo; 451 452 stream = kmem_zalloc(sizeof (struct streamtab), KM_SLEEP); 453 stream->st_rdinit = rq; 454 stream->st_wrinit = wq; 455 ops->devo_cb_ops->cb_str = stream; 456 457 ops->devo_getinfo = &dld_getinfo; 458 } 459 460 void 461 dld_fini_ops(struct dev_ops *ops) 462 { 463 struct streamtab *stream; 464 struct qinit *rq, *wq; 465 struct module_info *modinfo; 466 467 stream = ops->devo_cb_ops->cb_str; 468 rq = stream->st_rdinit; 469 wq = stream->st_wrinit; 470 modinfo = rq->qi_minfo; 471 ASSERT(wq->qi_minfo == modinfo); 472 473 kmem_free(stream, sizeof (struct streamtab)); 474 kmem_free(wq, sizeof (struct qinit)); 475 kmem_free(rq, sizeof (struct qinit)); 476 kmem_free(modinfo->mi_idname, FMNAMESZ); 477 kmem_free(modinfo, sizeof (struct module_info)); 478 } 479 480 /* 481 * Initialize this module's data structures. 482 */ 483 void 484 dld_str_init(void) 485 { 486 /* 487 * Create dld_str_t object cache. 488 */ 489 str_cachep = kmem_cache_create("dld_str_cache", sizeof (dld_str_t), 490 0, str_constructor, str_destructor, NULL, NULL, NULL, 0); 491 ASSERT(str_cachep != NULL); 492 493 /* 494 * Allocate a vmem arena to manage minor numbers. The range of the 495 * arena will be from DLD_MAX_MINOR + 1 to MAXMIN (maximum legal 496 * minor number). 497 */ 498 minor_arenap = vmem_create("dld_minor_arena", 499 MINOR_TO_PTR(DLD_MAX_MINOR + 1), MAXMIN, 1, NULL, NULL, NULL, 0, 500 VM_SLEEP | VMC_IDENTIFIER); 501 ASSERT(minor_arenap != NULL); 502 } 503 504 /* 505 * Tear down this module's data structures. 506 */ 507 int 508 dld_str_fini(void) 509 { 510 /* 511 * Make sure that there are no objects in use. 512 */ 513 if (str_count != 0) 514 return (EBUSY); 515 516 /* 517 * Check to see if there are any minor numbers still in use. 518 */ 519 if (minor_count != 0) 520 return (EBUSY); 521 522 /* 523 * Destroy object cache. 524 */ 525 kmem_cache_destroy(str_cachep); 526 vmem_destroy(minor_arenap); 527 return (0); 528 } 529 530 /* 531 * Create a new dld_str_t object. 532 */ 533 dld_str_t * 534 dld_str_create(queue_t *rq, uint_t type, major_t major, t_uscalar_t style) 535 { 536 dld_str_t *dsp; 537 538 /* 539 * Allocate an object from the cache. 540 */ 541 atomic_add_32(&str_count, 1); 542 dsp = kmem_cache_alloc(str_cachep, KM_SLEEP); 543 544 /* 545 * Allocate the dummy mblk for flow-control. 546 */ 547 dsp->ds_tx_flow_mp = allocb(1, BPRI_HI); 548 if (dsp->ds_tx_flow_mp == NULL) { 549 kmem_cache_free(str_cachep, dsp); 550 atomic_add_32(&str_count, -1); 551 return (NULL); 552 } 553 dsp->ds_type = type; 554 dsp->ds_major = major; 555 dsp->ds_style = style; 556 557 /* 558 * Initialize the queue pointers. 559 */ 560 ASSERT(RD(rq) == rq); 561 dsp->ds_rq = rq; 562 dsp->ds_wq = WR(rq); 563 rq->q_ptr = WR(rq)->q_ptr = (void *)dsp; 564 565 /* 566 * We want explicit control over our write-side STREAMS queue 567 * where the dummy mblk gets added/removed for flow-control. 568 */ 569 noenable(WR(rq)); 570 571 return (dsp); 572 } 573 574 /* 575 * Destroy a dld_str_t object. 576 */ 577 void 578 dld_str_destroy(dld_str_t *dsp) 579 { 580 queue_t *rq; 581 queue_t *wq; 582 583 /* 584 * Clear the queue pointers. 585 */ 586 rq = dsp->ds_rq; 587 wq = dsp->ds_wq; 588 ASSERT(wq == WR(rq)); 589 590 rq->q_ptr = wq->q_ptr = NULL; 591 dsp->ds_rq = dsp->ds_wq = NULL; 592 593 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 594 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 595 ASSERT(dsp->ds_tx_list_head == NULL); 596 ASSERT(dsp->ds_tx_list_tail == NULL); 597 ASSERT(dsp->ds_tx_cnt == 0); 598 ASSERT(dsp->ds_tx_msgcnt == 0); 599 ASSERT(!dsp->ds_tx_qbusy); 600 601 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 602 ASSERT(dsp->ds_thr == 0); 603 ASSERT(dsp->ds_pending_req == NULL); 604 605 /* 606 * Reinitialize all the flags. 607 */ 608 dsp->ds_notifications = 0; 609 dsp->ds_passivestate = DLD_UNINITIALIZED; 610 dsp->ds_mode = DLD_UNITDATA; 611 612 /* 613 * Free the dummy mblk if exists. 614 */ 615 if (dsp->ds_tx_flow_mp != NULL) { 616 freeb(dsp->ds_tx_flow_mp); 617 dsp->ds_tx_flow_mp = NULL; 618 } 619 /* 620 * Free the object back to the cache. 621 */ 622 kmem_cache_free(str_cachep, dsp); 623 atomic_add_32(&str_count, -1); 624 } 625 626 /* 627 * kmem_cache contructor function: see kmem_cache_create(9f). 628 */ 629 /*ARGSUSED*/ 630 static int 631 str_constructor(void *buf, void *cdrarg, int kmflags) 632 { 633 dld_str_t *dsp = buf; 634 635 bzero(buf, sizeof (dld_str_t)); 636 637 /* 638 * Allocate a new minor number. 639 */ 640 if ((dsp->ds_minor = dld_minor_hold(kmflags == KM_SLEEP)) == 0) 641 return (-1); 642 643 /* 644 * Initialize the DLPI state machine. 645 */ 646 dsp->ds_dlstate = DL_UNATTACHED; 647 648 mutex_init(&dsp->ds_thr_lock, NULL, MUTEX_DRIVER, NULL); 649 rw_init(&dsp->ds_lock, NULL, RW_DRIVER, NULL); 650 mutex_init(&dsp->ds_tx_list_lock, NULL, MUTEX_DRIVER, NULL); 651 cv_init(&dsp->ds_pending_cv, NULL, CV_DRIVER, NULL); 652 653 return (0); 654 } 655 656 /* 657 * kmem_cache destructor function. 658 */ 659 /*ARGSUSED*/ 660 static void 661 str_destructor(void *buf, void *cdrarg) 662 { 663 dld_str_t *dsp = buf; 664 665 /* 666 * Make sure the DLPI state machine was reset. 667 */ 668 ASSERT(dsp->ds_dlstate == DL_UNATTACHED); 669 670 /* 671 * Make sure the data-link interface was closed. 672 */ 673 ASSERT(dsp->ds_mh == NULL); 674 ASSERT(dsp->ds_dc == NULL); 675 676 /* 677 * Make sure enabled notifications are cleared. 678 */ 679 ASSERT(dsp->ds_notifications == 0); 680 681 /* 682 * Make sure polling is disabled. 683 */ 684 ASSERT(!dsp->ds_polling); 685 686 /* 687 * Release the minor number. 688 */ 689 dld_minor_rele(dsp->ds_minor); 690 691 ASSERT(!RW_LOCK_HELD(&dsp->ds_lock)); 692 rw_destroy(&dsp->ds_lock); 693 694 ASSERT(MUTEX_NOT_HELD(&dsp->ds_tx_list_lock)); 695 mutex_destroy(&dsp->ds_tx_list_lock); 696 ASSERT(dsp->ds_tx_flow_mp == NULL); 697 698 ASSERT(MUTEX_NOT_HELD(&dsp->ds_thr_lock)); 699 mutex_destroy(&dsp->ds_thr_lock); 700 ASSERT(dsp->ds_pending_req == NULL); 701 ASSERT(dsp->ds_pending_op == NULL); 702 ASSERT(dsp->ds_pending_cnt == 0); 703 cv_destroy(&dsp->ds_pending_cv); 704 } 705 706 /* 707 * M_DATA put (IP fast-path mode) 708 */ 709 void 710 str_mdata_fastpath_put(dld_str_t *dsp, mblk_t *mp) 711 { 712 /* 713 * This function can be called from within dld or from an upper 714 * layer protocol (currently only tcp). If we are in the busy 715 * mode enqueue the packet(s) and return. Otherwise hand them 716 * over to the MAC driver for transmission; any remaining one(s) 717 * which didn't get sent will be queued. 718 * 719 * Note here that we don't grab the list lock prior to checking 720 * the busy flag. This is okay, because a missed transition 721 * will not cause any packet reordering for any particular TCP 722 * connection (which is single-threaded). The enqueue routine 723 * will atomically set the busy flag and schedule the service 724 * thread to run; the flag is only cleared by the service thread 725 * when there is no more packet to be transmitted. 726 */ 727 if (dsp->ds_tx_qbusy || (mp = dls_tx(dsp->ds_dc, mp)) != NULL) 728 dld_tx_enqueue(dsp, mp, B_FALSE); 729 } 730 731 /* 732 * M_DATA put (raw mode) 733 */ 734 void 735 str_mdata_raw_put(dld_str_t *dsp, mblk_t *mp) 736 { 737 struct ether_header *ehp; 738 mblk_t *bp; 739 size_t size; 740 size_t hdrlen; 741 742 size = MBLKL(mp); 743 if (size < sizeof (struct ether_header)) 744 goto discard; 745 746 hdrlen = sizeof (struct ether_header); 747 748 ehp = (struct ether_header *)mp->b_rptr; 749 if (ntohs(ehp->ether_type) == VLAN_TPID) { 750 struct ether_vlan_header *evhp; 751 752 if (size < sizeof (struct ether_vlan_header)) 753 goto discard; 754 755 /* 756 * Replace vtag with our own 757 */ 758 evhp = (struct ether_vlan_header *)ehp; 759 evhp->ether_tci = htons(VLAN_TCI(dsp->ds_pri, 760 ETHER_CFI, dsp->ds_vid)); 761 hdrlen = sizeof (struct ether_vlan_header); 762 } 763 764 /* 765 * Check the packet is not too big and that any remaining 766 * fragment list is composed entirely of M_DATA messages. (We 767 * know the first fragment was M_DATA otherwise we could not 768 * have got here). 769 */ 770 for (bp = mp->b_cont; bp != NULL; bp = bp->b_cont) { 771 if (DB_TYPE(bp) != M_DATA) 772 goto discard; 773 size += MBLKL(bp); 774 } 775 776 if (size > dsp->ds_mip->mi_sdu_max + hdrlen) 777 goto discard; 778 779 str_mdata_fastpath_put(dsp, mp); 780 return; 781 782 discard: 783 freemsg(mp); 784 } 785 786 /* 787 * Process DL_ATTACH_REQ (style 2) or open(2) (style 1). 788 */ 789 int 790 dld_str_attach(dld_str_t *dsp, t_uscalar_t ppa) 791 { 792 int err; 793 const char *drvname; 794 char name[MAXNAMELEN]; 795 dls_channel_t dc; 796 uint_t addr_length; 797 798 ASSERT(dsp->ds_dc == NULL); 799 800 if ((drvname = ddi_major_to_name(dsp->ds_major)) == NULL) 801 return (EINVAL); 802 803 (void) snprintf(name, MAXNAMELEN, "%s%u", drvname, ppa); 804 805 if (strcmp(drvname, "aggr") != 0 && 806 qassociate(dsp->ds_wq, DLS_PPA2INST(ppa)) != 0) 807 return (EINVAL); 808 809 /* 810 * Open a channel. 811 */ 812 if ((err = dls_open(name, &dc)) != 0) { 813 (void) qassociate(dsp->ds_wq, -1); 814 return (err); 815 } 816 817 /* 818 * Cache the MAC interface handle, a pointer to the immutable MAC 819 * information and the current and 'factory' MAC address. 820 */ 821 dsp->ds_mh = dls_mac(dc); 822 dsp->ds_mip = mac_info(dsp->ds_mh); 823 824 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 825 826 addr_length = dsp->ds_mip->mi_addr_length; 827 bcopy(dsp->ds_mip->mi_unicst_addr, dsp->ds_fact_addr, addr_length); 828 829 /* 830 * Cache the interface VLAN identifier. (This will be VLAN_ID_NONE for 831 * a non-VLAN interface). 832 */ 833 dsp->ds_vid = dls_vid(dc); 834 835 /* 836 * Set the default packet priority. 837 */ 838 dsp->ds_pri = 0; 839 840 /* 841 * Add a notify function so that the we get updates from the MAC. 842 */ 843 dsp->ds_mnh = mac_notify_add(dsp->ds_mh, str_notify, (void *)dsp); 844 845 dsp->ds_dc = dc; 846 dsp->ds_dlstate = DL_UNBOUND; 847 848 return (0); 849 } 850 851 /* 852 * Process DL_DETACH_REQ (style 2) or close(2) (style 1). Can also be called 853 * from close(2) for style 2. 854 */ 855 void 856 dld_str_detach(dld_str_t *dsp) 857 { 858 ASSERT(dsp->ds_thr == 0); 859 860 /* 861 * Remove the notify function. 862 */ 863 mac_notify_remove(dsp->ds_mh, dsp->ds_mnh); 864 865 /* 866 * Clear the polling and promisc flags. 867 */ 868 dsp->ds_polling = B_FALSE; 869 dsp->ds_soft_ring = B_FALSE; 870 dsp->ds_promisc = 0; 871 872 /* 873 * Close the channel. 874 */ 875 dls_close(dsp->ds_dc); 876 dsp->ds_dc = NULL; 877 dsp->ds_mh = NULL; 878 879 (void) qassociate(dsp->ds_wq, -1); 880 881 /* 882 * Re-initialize the DLPI state machine. 883 */ 884 dsp->ds_dlstate = DL_UNATTACHED; 885 886 } 887 888 /* 889 * Raw mode receive function. 890 */ 891 /*ARGSUSED*/ 892 void 893 dld_str_rx_raw(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 894 size_t header_length) 895 { 896 dld_str_t *dsp = (dld_str_t *)arg; 897 mblk_t *next; 898 899 ASSERT(mp != NULL); 900 do { 901 /* 902 * Get the pointer to the next packet in the chain and then 903 * clear b_next before the packet gets passed on. 904 */ 905 next = mp->b_next; 906 mp->b_next = NULL; 907 908 /* 909 * Wind back b_rptr to point at the MAC header. 910 */ 911 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 912 mp->b_rptr -= header_length; 913 if (header_length == sizeof (struct ether_vlan_header)) { 914 /* 915 * Strip off the vtag 916 */ 917 ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 918 2 * ETHERADDRL); 919 mp->b_rptr += VLAN_TAGSZ; 920 } 921 922 /* 923 * Pass the packet on. 924 */ 925 putnext(dsp->ds_rq, mp); 926 927 /* 928 * Move on to the next packet in the chain. 929 */ 930 mp = next; 931 } while (mp != NULL); 932 } 933 934 /* 935 * Fast-path receive function. 936 */ 937 /*ARGSUSED*/ 938 void 939 dld_str_rx_fastpath(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 940 size_t header_length) 941 { 942 dld_str_t *dsp = (dld_str_t *)arg; 943 mblk_t *next; 944 945 ASSERT(mp != NULL); 946 do { 947 /* 948 * Get the pointer to the next packet in the chain and then 949 * clear b_next before the packet gets passed on. 950 */ 951 next = mp->b_next; 952 mp->b_next = NULL; 953 954 /* 955 * Pass the packet on. 956 */ 957 putnext(dsp->ds_rq, mp); 958 959 /* 960 * Move on to the next packet in the chain. 961 */ 962 mp = next; 963 } while (mp != NULL); 964 } 965 966 /* 967 * Default receive function (send DL_UNITDATA_IND messages). 968 */ 969 /*ARGSUSED*/ 970 void 971 dld_str_rx_unitdata(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 972 size_t header_length) 973 { 974 dld_str_t *dsp = (dld_str_t *)arg; 975 mblk_t *ud_mp; 976 mblk_t *next; 977 978 ASSERT(mp != NULL); 979 do { 980 /* 981 * Get the pointer to the next packet in the chain and then 982 * clear b_next before the packet gets passed on. 983 */ 984 next = mp->b_next; 985 mp->b_next = NULL; 986 987 /* 988 * Wind back b_rptr to point at the MAC header. 989 */ 990 ASSERT(mp->b_rptr >= DB_BASE(mp) + header_length); 991 mp->b_rptr -= header_length; 992 993 /* 994 * Create the DL_UNITDATA_IND M_PROTO. 995 */ 996 if ((ud_mp = str_unitdata_ind(dsp, mp)) == NULL) { 997 freemsgchain(mp); 998 return; 999 } 1000 1001 /* 1002 * Advance b_rptr to point at the payload again. 1003 */ 1004 mp->b_rptr += header_length; 1005 1006 /* 1007 * Prepend the DL_UNITDATA_IND. 1008 */ 1009 ud_mp->b_cont = mp; 1010 1011 /* 1012 * Send the message. 1013 */ 1014 putnext(dsp->ds_rq, ud_mp); 1015 1016 /* 1017 * Move on to the next packet in the chain. 1018 */ 1019 mp = next; 1020 } while (mp != NULL); 1021 } 1022 1023 /* 1024 * Generate DL_NOTIFY_IND messages to notify the DLPI consumer of the 1025 * current state of the interface. 1026 */ 1027 void 1028 dld_str_notify_ind(dld_str_t *dsp) 1029 { 1030 mac_notify_type_t type; 1031 1032 for (type = 0; type < MAC_NNOTE; type++) 1033 str_notify(dsp, type); 1034 } 1035 1036 typedef struct dl_unitdata_ind_wrapper { 1037 dl_unitdata_ind_t dl_unitdata; 1038 uint8_t dl_dest_addr[MAXADDRLEN + sizeof (uint16_t)]; 1039 uint8_t dl_src_addr[MAXADDRLEN + sizeof (uint16_t)]; 1040 } dl_unitdata_ind_wrapper_t; 1041 1042 /* 1043 * Create a DL_UNITDATA_IND M_PROTO message. 1044 */ 1045 static mblk_t * 1046 str_unitdata_ind(dld_str_t *dsp, mblk_t *mp) 1047 { 1048 mblk_t *nmp; 1049 dl_unitdata_ind_wrapper_t *dlwp; 1050 dl_unitdata_ind_t *dlp; 1051 dls_header_info_t dhi; 1052 uint_t addr_length; 1053 uint8_t *daddr; 1054 uint8_t *saddr; 1055 1056 /* 1057 * Get the packet header information. 1058 */ 1059 dls_header_info(dsp->ds_dc, mp, &dhi); 1060 1061 /* 1062 * Allocate a message large enough to contain the wrapper structure 1063 * defined above. 1064 */ 1065 if ((nmp = mexchange(dsp->ds_wq, NULL, 1066 sizeof (dl_unitdata_ind_wrapper_t), M_PROTO, 1067 DL_UNITDATA_IND)) == NULL) 1068 return (NULL); 1069 1070 dlwp = (dl_unitdata_ind_wrapper_t *)nmp->b_rptr; 1071 1072 dlp = &(dlwp->dl_unitdata); 1073 ASSERT(dlp == (dl_unitdata_ind_t *)nmp->b_rptr); 1074 ASSERT(dlp->dl_primitive == DL_UNITDATA_IND); 1075 1076 /* 1077 * Copy in the destination address. 1078 */ 1079 addr_length = dsp->ds_mip->mi_addr_length; 1080 daddr = dlwp->dl_dest_addr; 1081 dlp->dl_dest_addr_offset = (uintptr_t)daddr - (uintptr_t)dlp; 1082 bcopy(dhi.dhi_daddr, daddr, addr_length); 1083 1084 /* 1085 * Set the destination DLSAP to our bound DLSAP value. 1086 */ 1087 *(uint16_t *)(daddr + addr_length) = dsp->ds_sap; 1088 dlp->dl_dest_addr_length = addr_length + sizeof (uint16_t); 1089 1090 /* 1091 * If the destination address was a group address then 1092 * dl_group_address field should be non-zero. 1093 */ 1094 dlp->dl_group_address = dhi.dhi_isgroup; 1095 1096 /* 1097 * Copy in the source address. 1098 */ 1099 saddr = dlwp->dl_src_addr; 1100 dlp->dl_src_addr_offset = (uintptr_t)saddr - (uintptr_t)dlp; 1101 bcopy(dhi.dhi_saddr, saddr, addr_length); 1102 1103 /* 1104 * Set the source DLSAP to the packet ethertype. 1105 */ 1106 *(uint16_t *)(saddr + addr_length) = dhi.dhi_ethertype; 1107 dlp->dl_src_addr_length = addr_length + sizeof (uint16_t); 1108 1109 return (nmp); 1110 } 1111 1112 /* 1113 * DL_NOTIFY_IND: DL_NOTE_PROMISC_ON_PHYS 1114 */ 1115 static void 1116 str_notify_promisc_on_phys(dld_str_t *dsp) 1117 { 1118 mblk_t *mp; 1119 dl_notify_ind_t *dlip; 1120 1121 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_ON_PHYS)) 1122 return; 1123 1124 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1125 M_PROTO, 0)) == NULL) 1126 return; 1127 1128 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1129 dlip = (dl_notify_ind_t *)mp->b_rptr; 1130 dlip->dl_primitive = DL_NOTIFY_IND; 1131 dlip->dl_notification = DL_NOTE_PROMISC_ON_PHYS; 1132 1133 qreply(dsp->ds_wq, mp); 1134 } 1135 1136 /* 1137 * DL_NOTIFY_IND: DL_NOTE_PROMISC_OFF_PHYS 1138 */ 1139 static void 1140 str_notify_promisc_off_phys(dld_str_t *dsp) 1141 { 1142 mblk_t *mp; 1143 dl_notify_ind_t *dlip; 1144 1145 if (!(dsp->ds_notifications & DL_NOTE_PROMISC_OFF_PHYS)) 1146 return; 1147 1148 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1149 M_PROTO, 0)) == NULL) 1150 return; 1151 1152 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1153 dlip = (dl_notify_ind_t *)mp->b_rptr; 1154 dlip->dl_primitive = DL_NOTIFY_IND; 1155 dlip->dl_notification = DL_NOTE_PROMISC_OFF_PHYS; 1156 1157 qreply(dsp->ds_wq, mp); 1158 } 1159 1160 /* 1161 * DL_NOTIFY_IND: DL_NOTE_PHYS_ADDR 1162 */ 1163 static void 1164 str_notify_phys_addr(dld_str_t *dsp, const uint8_t *addr) 1165 { 1166 mblk_t *mp; 1167 dl_notify_ind_t *dlip; 1168 uint_t addr_length; 1169 uint16_t ethertype; 1170 1171 if (!(dsp->ds_notifications & DL_NOTE_PHYS_ADDR)) 1172 return; 1173 1174 addr_length = dsp->ds_mip->mi_addr_length; 1175 if ((mp = mexchange(dsp->ds_wq, NULL, 1176 sizeof (dl_notify_ind_t) + addr_length + sizeof (uint16_t), 1177 M_PROTO, 0)) == NULL) 1178 return; 1179 1180 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1181 dlip = (dl_notify_ind_t *)mp->b_rptr; 1182 dlip->dl_primitive = DL_NOTIFY_IND; 1183 dlip->dl_notification = DL_NOTE_PHYS_ADDR; 1184 dlip->dl_data = DL_CURR_PHYS_ADDR; 1185 dlip->dl_addr_offset = sizeof (dl_notify_ind_t); 1186 dlip->dl_addr_length = addr_length + sizeof (uint16_t); 1187 1188 bcopy(addr, &dlip[1], addr_length); 1189 1190 ethertype = (dsp->ds_sap < ETHERTYPE_802_MIN) ? 0 : dsp->ds_sap; 1191 *(uint16_t *)((uchar_t *)(dlip + 1) + addr_length) = 1192 ethertype; 1193 1194 qreply(dsp->ds_wq, mp); 1195 } 1196 1197 /* 1198 * DL_NOTIFY_IND: DL_NOTE_LINK_UP 1199 */ 1200 static void 1201 str_notify_link_up(dld_str_t *dsp) 1202 { 1203 mblk_t *mp; 1204 dl_notify_ind_t *dlip; 1205 1206 if (!(dsp->ds_notifications & DL_NOTE_LINK_UP)) 1207 return; 1208 1209 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1210 M_PROTO, 0)) == NULL) 1211 return; 1212 1213 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1214 dlip = (dl_notify_ind_t *)mp->b_rptr; 1215 dlip->dl_primitive = DL_NOTIFY_IND; 1216 dlip->dl_notification = DL_NOTE_LINK_UP; 1217 1218 qreply(dsp->ds_wq, mp); 1219 } 1220 1221 /* 1222 * DL_NOTIFY_IND: DL_NOTE_LINK_DOWN 1223 */ 1224 static void 1225 str_notify_link_down(dld_str_t *dsp) 1226 { 1227 mblk_t *mp; 1228 dl_notify_ind_t *dlip; 1229 1230 if (!(dsp->ds_notifications & DL_NOTE_LINK_DOWN)) 1231 return; 1232 1233 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1234 M_PROTO, 0)) == NULL) 1235 return; 1236 1237 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1238 dlip = (dl_notify_ind_t *)mp->b_rptr; 1239 dlip->dl_primitive = DL_NOTIFY_IND; 1240 dlip->dl_notification = DL_NOTE_LINK_DOWN; 1241 1242 qreply(dsp->ds_wq, mp); 1243 } 1244 1245 /* 1246 * DL_NOTIFY_IND: DL_NOTE_SPEED 1247 */ 1248 static void 1249 str_notify_speed(dld_str_t *dsp, uint32_t speed) 1250 { 1251 mblk_t *mp; 1252 dl_notify_ind_t *dlip; 1253 1254 if (!(dsp->ds_notifications & DL_NOTE_SPEED)) 1255 return; 1256 1257 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1258 M_PROTO, 0)) == NULL) 1259 return; 1260 1261 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1262 dlip = (dl_notify_ind_t *)mp->b_rptr; 1263 dlip->dl_primitive = DL_NOTIFY_IND; 1264 dlip->dl_notification = DL_NOTE_SPEED; 1265 dlip->dl_data = speed; 1266 1267 qreply(dsp->ds_wq, mp); 1268 } 1269 1270 /* 1271 * DL_NOTIFY_IND: DL_NOTE_CAPAB_RENEG 1272 */ 1273 static void 1274 str_notify_capab_reneg(dld_str_t *dsp) 1275 { 1276 mblk_t *mp; 1277 dl_notify_ind_t *dlip; 1278 1279 if (!(dsp->ds_notifications & DL_NOTE_CAPAB_RENEG)) 1280 return; 1281 1282 if ((mp = mexchange(dsp->ds_wq, NULL, sizeof (dl_notify_ind_t), 1283 M_PROTO, 0)) == NULL) 1284 return; 1285 1286 bzero(mp->b_rptr, sizeof (dl_notify_ind_t)); 1287 dlip = (dl_notify_ind_t *)mp->b_rptr; 1288 dlip->dl_primitive = DL_NOTIFY_IND; 1289 dlip->dl_notification = DL_NOTE_CAPAB_RENEG; 1290 1291 qreply(dsp->ds_wq, mp); 1292 } 1293 1294 /* 1295 * MAC notification callback. 1296 */ 1297 static void 1298 str_notify(void *arg, mac_notify_type_t type) 1299 { 1300 dld_str_t *dsp = (dld_str_t *)arg; 1301 queue_t *q = dsp->ds_wq; 1302 1303 switch (type) { 1304 case MAC_NOTE_TX: 1305 qenable(q); 1306 break; 1307 1308 case MAC_NOTE_DEVPROMISC: 1309 /* 1310 * Send the appropriate DL_NOTIFY_IND. 1311 */ 1312 if (mac_promisc_get(dsp->ds_mh, MAC_DEVPROMISC)) 1313 str_notify_promisc_on_phys(dsp); 1314 else 1315 str_notify_promisc_off_phys(dsp); 1316 break; 1317 1318 case MAC_NOTE_PROMISC: 1319 break; 1320 1321 case MAC_NOTE_UNICST: 1322 /* 1323 * This notification is sent whenever the MAC unicast address 1324 * changes. We need to re-cache the address. 1325 */ 1326 mac_unicst_get(dsp->ds_mh, dsp->ds_curr_addr); 1327 1328 /* 1329 * Send the appropriate DL_NOTIFY_IND. 1330 */ 1331 str_notify_phys_addr(dsp, dsp->ds_curr_addr); 1332 break; 1333 1334 case MAC_NOTE_LINK: 1335 /* 1336 * This notification is sent every time the MAC driver 1337 * updates the link state. 1338 */ 1339 switch (mac_link_get(dsp->ds_mh)) { 1340 case LINK_STATE_UP: 1341 /* 1342 * The link is up so send the appropriate 1343 * DL_NOTIFY_IND. 1344 */ 1345 str_notify_link_up(dsp); 1346 1347 /* 1348 * If we can find the link speed then send a 1349 * DL_NOTIFY_IND for that too. 1350 */ 1351 if (dsp->ds_mip->mi_stat[MAC_STAT_IFSPEED]) { 1352 uint64_t val; 1353 1354 val = mac_stat_get(dsp->ds_mh, 1355 MAC_STAT_IFSPEED); 1356 str_notify_speed(dsp, 1357 (uint32_t)(val / 1000ull)); 1358 } 1359 break; 1360 1361 case LINK_STATE_DOWN: 1362 /* 1363 * The link is down so send the appropriate 1364 * DL_NOTIFY_IND. 1365 */ 1366 str_notify_link_down(dsp); 1367 break; 1368 1369 default: 1370 break; 1371 } 1372 break; 1373 1374 case MAC_NOTE_RESOURCE: 1375 /* 1376 * This notification is sent whenever the MAC resources 1377 * change. We need to renegotiate the capabilities. 1378 * Send the appropriate DL_NOTIFY_IND. 1379 */ 1380 str_notify_capab_reneg(dsp); 1381 break; 1382 1383 default: 1384 ASSERT(B_FALSE); 1385 break; 1386 } 1387 } 1388 1389 /* 1390 * Enqueue one or more messages to the transmit queue. 1391 * Caller specifies the insertion position (head/tail). 1392 */ 1393 void 1394 dld_tx_enqueue(dld_str_t *dsp, mblk_t *mp, boolean_t head_insert) 1395 { 1396 mblk_t *tail; 1397 queue_t *q = dsp->ds_wq; 1398 uint_t cnt, msgcnt; 1399 uint_t tot_cnt, tot_msgcnt; 1400 1401 ASSERT(DB_TYPE(mp) == M_DATA); 1402 /* Calculate total size and count of the packet(s) */ 1403 for (tail = mp, cnt = msgdsize(mp), msgcnt = 1; 1404 tail->b_next != NULL; tail = tail->b_next) { 1405 ASSERT(DB_TYPE(tail) == M_DATA); 1406 cnt += msgdsize(tail); 1407 msgcnt++; 1408 } 1409 1410 mutex_enter(&dsp->ds_tx_list_lock); 1411 /* 1412 * If the queue depth would exceed the allowed threshold, drop 1413 * new packet(s) and drain those already in the queue. 1414 */ 1415 tot_cnt = dsp->ds_tx_cnt + cnt; 1416 tot_msgcnt = dsp->ds_tx_msgcnt + msgcnt; 1417 1418 if (!head_insert && 1419 (tot_cnt >= dld_max_q_count || tot_msgcnt >= dld_max_q_count)) { 1420 ASSERT(dsp->ds_tx_qbusy); 1421 mutex_exit(&dsp->ds_tx_list_lock); 1422 freemsgchain(mp); 1423 goto done; 1424 } 1425 1426 /* Update the queue size parameters */ 1427 dsp->ds_tx_cnt = tot_cnt; 1428 dsp->ds_tx_msgcnt = tot_msgcnt; 1429 1430 /* 1431 * If the transmit queue is currently empty and we are 1432 * about to deposit the packet(s) there, switch mode to 1433 * "busy" and raise flow-control condition. 1434 */ 1435 if (!dsp->ds_tx_qbusy) { 1436 dsp->ds_tx_qbusy = B_TRUE; 1437 ASSERT(dsp->ds_tx_flow_mp != NULL); 1438 (void) putq(q, dsp->ds_tx_flow_mp); 1439 dsp->ds_tx_flow_mp = NULL; 1440 } 1441 1442 if (!head_insert) { 1443 /* Tail insertion */ 1444 if (dsp->ds_tx_list_head == NULL) 1445 dsp->ds_tx_list_head = mp; 1446 else 1447 dsp->ds_tx_list_tail->b_next = mp; 1448 dsp->ds_tx_list_tail = tail; 1449 } else { 1450 /* Head insertion */ 1451 tail->b_next = dsp->ds_tx_list_head; 1452 if (dsp->ds_tx_list_head == NULL) 1453 dsp->ds_tx_list_tail = tail; 1454 dsp->ds_tx_list_head = mp; 1455 } 1456 mutex_exit(&dsp->ds_tx_list_lock); 1457 done: 1458 /* Schedule service thread to drain the transmit queue */ 1459 qenable(q); 1460 } 1461 1462 void 1463 dld_tx_flush(dld_str_t *dsp) 1464 { 1465 mutex_enter(&dsp->ds_tx_list_lock); 1466 if (dsp->ds_tx_list_head != NULL) { 1467 freemsgchain(dsp->ds_tx_list_head); 1468 dsp->ds_tx_list_head = dsp->ds_tx_list_tail = NULL; 1469 dsp->ds_tx_cnt = dsp->ds_tx_msgcnt = 0; 1470 if (dsp->ds_tx_qbusy) { 1471 dsp->ds_tx_flow_mp = getq(dsp->ds_wq); 1472 ASSERT(dsp->ds_tx_flow_mp != NULL); 1473 dsp->ds_tx_qbusy = B_FALSE; 1474 } 1475 } 1476 mutex_exit(&dsp->ds_tx_list_lock); 1477 } 1478 1479 /* 1480 * Process an M_IOCTL message. 1481 */ 1482 static void 1483 dld_ioc(dld_str_t *dsp, mblk_t *mp) 1484 { 1485 uint_t cmd; 1486 1487 cmd = ((struct iocblk *)mp->b_rptr)->ioc_cmd; 1488 ASSERT(dsp->ds_type == DLD_DLPI); 1489 1490 switch (cmd) { 1491 case DLIOCRAW: 1492 ioc_raw(dsp, mp); 1493 break; 1494 case DLIOCHDRINFO: 1495 ioc_fast(dsp, mp); 1496 break; 1497 default: 1498 ioc(dsp, mp); 1499 } 1500 } 1501 1502 /* 1503 * DLIOCRAW 1504 */ 1505 static void 1506 ioc_raw(dld_str_t *dsp, mblk_t *mp) 1507 { 1508 queue_t *q = dsp->ds_wq; 1509 1510 rw_enter(&dsp->ds_lock, RW_WRITER); 1511 if (dsp->ds_polling || dsp->ds_soft_ring) { 1512 rw_exit(&dsp->ds_lock); 1513 miocnak(q, mp, 0, EPROTO); 1514 return; 1515 } 1516 1517 if (dsp->ds_mode != DLD_RAW && dsp->ds_dlstate == DL_IDLE) { 1518 /* 1519 * Set the receive callback. 1520 */ 1521 dls_rx_set(dsp->ds_dc, dld_str_rx_raw, (void *)dsp); 1522 1523 /* 1524 * Note that raw mode is enabled. 1525 */ 1526 dsp->ds_mode = DLD_RAW; 1527 } 1528 1529 rw_exit(&dsp->ds_lock); 1530 miocack(q, mp, 0, 0); 1531 } 1532 1533 /* 1534 * DLIOCHDRINFO 1535 */ 1536 static void 1537 ioc_fast(dld_str_t *dsp, mblk_t *mp) 1538 { 1539 dl_unitdata_req_t *dlp; 1540 off_t off; 1541 size_t len; 1542 const uint8_t *addr; 1543 uint16_t sap; 1544 mblk_t *nmp; 1545 mblk_t *hmp; 1546 uint_t addr_length; 1547 queue_t *q = dsp->ds_wq; 1548 int err; 1549 dls_channel_t dc; 1550 1551 if (dld_opt & DLD_OPT_NO_FASTPATH) { 1552 err = ENOTSUP; 1553 goto failed; 1554 } 1555 1556 nmp = mp->b_cont; 1557 if (nmp == NULL || MBLKL(nmp) < sizeof (dl_unitdata_req_t) || 1558 (dlp = (dl_unitdata_req_t *)nmp->b_rptr, 1559 dlp->dl_primitive != DL_UNITDATA_REQ)) { 1560 err = EINVAL; 1561 goto failed; 1562 } 1563 1564 off = dlp->dl_dest_addr_offset; 1565 len = dlp->dl_dest_addr_length; 1566 1567 if (!MBLKIN(nmp, off, len)) { 1568 err = EINVAL; 1569 goto failed; 1570 } 1571 1572 rw_enter(&dsp->ds_lock, RW_READER); 1573 if (dsp->ds_dlstate != DL_IDLE) { 1574 rw_exit(&dsp->ds_lock); 1575 err = ENOTSUP; 1576 goto failed; 1577 } 1578 1579 addr_length = dsp->ds_mip->mi_addr_length; 1580 if (len != addr_length + sizeof (uint16_t)) { 1581 rw_exit(&dsp->ds_lock); 1582 err = EINVAL; 1583 goto failed; 1584 } 1585 1586 addr = nmp->b_rptr + off; 1587 sap = *(uint16_t *)(nmp->b_rptr + off + addr_length); 1588 dc = dsp->ds_dc; 1589 1590 if ((hmp = dls_header(dc, addr, sap, dsp->ds_pri)) == NULL) { 1591 rw_exit(&dsp->ds_lock); 1592 err = ENOMEM; 1593 goto failed; 1594 } 1595 1596 /* 1597 * This is a performance optimization. We originally entered 1598 * as reader and only become writer upon transitioning into 1599 * the DLD_FASTPATH mode for the first time. Otherwise we 1600 * stay as reader and return the fast-path header to IP. 1601 */ 1602 if (dsp->ds_mode != DLD_FASTPATH) { 1603 if (!rw_tryupgrade(&dsp->ds_lock)) { 1604 rw_exit(&dsp->ds_lock); 1605 rw_enter(&dsp->ds_lock, RW_WRITER); 1606 1607 /* 1608 * State may have changed before we re-acquired 1609 * the writer lock in case the upgrade failed. 1610 */ 1611 if (dsp->ds_dlstate != DL_IDLE) { 1612 rw_exit(&dsp->ds_lock); 1613 err = ENOTSUP; 1614 goto failed; 1615 } 1616 } 1617 1618 /* 1619 * Set the receive callback (unless polling is enabled). 1620 */ 1621 if (!dsp->ds_polling && !dsp->ds_soft_ring) 1622 dls_rx_set(dc, dld_str_rx_fastpath, (void *)dsp); 1623 1624 /* 1625 * Note that fast-path mode is enabled. 1626 */ 1627 dsp->ds_mode = DLD_FASTPATH; 1628 } 1629 rw_exit(&dsp->ds_lock); 1630 1631 freemsg(nmp->b_cont); 1632 nmp->b_cont = hmp; 1633 1634 miocack(q, mp, MBLKL(nmp) + MBLKL(hmp), 0); 1635 return; 1636 failed: 1637 miocnak(q, mp, 0, err); 1638 } 1639 1640 /* 1641 * Catch-all handler. 1642 */ 1643 static void 1644 ioc(dld_str_t *dsp, mblk_t *mp) 1645 { 1646 queue_t *q = dsp->ds_wq; 1647 mac_handle_t mh; 1648 1649 rw_enter(&dsp->ds_lock, RW_READER); 1650 if (dsp->ds_dlstate == DL_UNATTACHED) { 1651 rw_exit(&dsp->ds_lock); 1652 miocnak(q, mp, 0, EINVAL); 1653 return; 1654 } 1655 mh = dsp->ds_mh; 1656 ASSERT(mh != NULL); 1657 rw_exit(&dsp->ds_lock); 1658 mac_ioctl(mh, q, mp); 1659 } 1660 1661 /* 1662 * Allocate a new minor number. 1663 */ 1664 static minor_t 1665 dld_minor_hold(boolean_t sleep) 1666 { 1667 minor_t minor; 1668 1669 /* 1670 * Grab a value from the arena. 1671 */ 1672 atomic_add_32(&minor_count, 1); 1673 if ((minor = PTR_TO_MINOR(vmem_alloc(minor_arenap, 1, 1674 (sleep) ? VM_SLEEP : VM_NOSLEEP))) == 0) { 1675 atomic_add_32(&minor_count, -1); 1676 return (0); 1677 } 1678 1679 return (minor); 1680 } 1681 1682 /* 1683 * Release a previously allocated minor number. 1684 */ 1685 static void 1686 dld_minor_rele(minor_t minor) 1687 { 1688 /* 1689 * Return the value to the arena. 1690 */ 1691 vmem_free(minor_arenap, MINOR_TO_PTR(minor), 1); 1692 1693 atomic_add_32(&minor_count, -1); 1694 } 1695